1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49 
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54   /// Kinds of OpenMP regions used in codegen.
55   enum CGOpenMPRegionKind {
56     /// Region with outlined function for standalone 'parallel'
57     /// directive.
58     ParallelOutlinedRegion,
59     /// Region with outlined function for standalone 'task' directive.
60     TaskOutlinedRegion,
61     /// Region for constructs that do not require function outlining,
62     /// like 'for', 'sections', 'atomic' etc. directives.
63     InlinedRegion,
64     /// Region with outlined function for standalone 'target' directive.
65     TargetRegion,
66   };
67 
68   CGOpenMPRegionInfo(const CapturedStmt &CS,
69                      const CGOpenMPRegionKind RegionKind,
70                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71                      bool HasCancel)
72       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 
75   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77                      bool HasCancel)
78       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79         Kind(Kind), HasCancel(HasCancel) {}
80 
81   /// Get a variable or parameter for storing global thread id
82   /// inside OpenMP construct.
83   virtual const VarDecl *getThreadIDVariable() const = 0;
84 
85   /// Emit the captured statement body.
86   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 
88   /// Get an LValue for the current ThreadID variable.
89   /// \return LValue for thread id variable. This LValue always has type int32*.
90   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 
92   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 
94   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 
96   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 
98   bool hasCancel() const { return HasCancel; }
99 
100   static bool classof(const CGCapturedStmtInfo *Info) {
101     return Info->getKind() == CR_OpenMP;
102   }
103 
104   ~CGOpenMPRegionInfo() override = default;
105 
106 protected:
107   CGOpenMPRegionKind RegionKind;
108   RegionCodeGenTy CodeGen;
109   OpenMPDirectiveKind Kind;
110   bool HasCancel;
111 };
112 
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
116   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117                              const RegionCodeGenTy &CodeGen,
118                              OpenMPDirectiveKind Kind, bool HasCancel,
119                              StringRef HelperName)
120       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121                            HasCancel),
122         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124   }
125 
126   /// Get a variable or parameter for storing global thread id
127   /// inside OpenMP construct.
128   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 
130   /// Get the name of the capture helper.
131   StringRef getHelperName() const override { return HelperName; }
132 
133   static bool classof(const CGCapturedStmtInfo *Info) {
134     return CGOpenMPRegionInfo::classof(Info) &&
135            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136                ParallelOutlinedRegion;
137   }
138 
139 private:
140   /// A variable or parameter storing global thread id for OpenMP
141   /// constructs.
142   const VarDecl *ThreadIDVar;
143   StringRef HelperName;
144 };
145 
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149   class UntiedTaskActionTy final : public PrePostActionTy {
150     bool Untied;
151     const VarDecl *PartIDVar;
152     const RegionCodeGenTy UntiedCodeGen;
153     llvm::SwitchInst *UntiedSwitch = nullptr;
154 
155   public:
156     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157                        const RegionCodeGenTy &UntiedCodeGen)
158         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159     void Enter(CodeGenFunction &CGF) override {
160       if (Untied) {
161         // Emit task switching point.
162         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163             CGF.GetAddrOfLocalVar(PartIDVar),
164             PartIDVar->getType()->castAs<PointerType>());
165         llvm::Value *Res =
166             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169         CGF.EmitBlock(DoneBB);
170         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173                               CGF.Builder.GetInsertBlock());
174         emitUntiedSwitch(CGF);
175       }
176     }
177     void emitUntiedSwitch(CodeGenFunction &CGF) const {
178       if (Untied) {
179         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180             CGF.GetAddrOfLocalVar(PartIDVar),
181             PartIDVar->getType()->castAs<PointerType>());
182         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183                               PartIdLVal);
184         UntiedCodeGen(CGF);
185         CodeGenFunction::JumpDest CurPoint =
186             CGF.getJumpDestInCurrentScope(".untied.next.");
187         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190                               CGF.Builder.GetInsertBlock());
191         CGF.EmitBranchThroughCleanup(CurPoint);
192         CGF.EmitBlock(CurPoint.getBlock());
193       }
194     }
195     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196   };
197   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198                                  const VarDecl *ThreadIDVar,
199                                  const RegionCodeGenTy &CodeGen,
200                                  OpenMPDirectiveKind Kind, bool HasCancel,
201                                  const UntiedTaskActionTy &Action)
202       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203         ThreadIDVar(ThreadIDVar), Action(Action) {
204     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205   }
206 
207   /// Get a variable or parameter for storing global thread id
208   /// inside OpenMP construct.
209   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 
211   /// Get an LValue for the current ThreadID variable.
212   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 
214   /// Get the name of the capture helper.
215   StringRef getHelperName() const override { return ".omp_outlined."; }
216 
217   void emitUntiedSwitch(CodeGenFunction &CGF) override {
218     Action.emitUntiedSwitch(CGF);
219   }
220 
221   static bool classof(const CGCapturedStmtInfo *Info) {
222     return CGOpenMPRegionInfo::classof(Info) &&
223            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224                TaskOutlinedRegion;
225   }
226 
227 private:
228   /// A variable or parameter storing global thread id for OpenMP
229   /// constructs.
230   const VarDecl *ThreadIDVar;
231   /// Action for emitting code for untied tasks.
232   const UntiedTaskActionTy &Action;
233 };
234 
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
239   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240                             const RegionCodeGenTy &CodeGen,
241                             OpenMPDirectiveKind Kind, bool HasCancel)
242       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243         OldCSI(OldCSI),
244         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 
246   // Retrieve the value of the context parameter.
247   llvm::Value *getContextValue() const override {
248     if (OuterRegionInfo)
249       return OuterRegionInfo->getContextValue();
250     llvm_unreachable("No context value for inlined OpenMP region");
251   }
252 
253   void setContextValue(llvm::Value *V) override {
254     if (OuterRegionInfo) {
255       OuterRegionInfo->setContextValue(V);
256       return;
257     }
258     llvm_unreachable("No context value for inlined OpenMP region");
259   }
260 
261   /// Lookup the captured field decl for a variable.
262   const FieldDecl *lookup(const VarDecl *VD) const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->lookup(VD);
265     // If there is no outer outlined region,no need to lookup in a list of
266     // captured variables, we can use the original one.
267     return nullptr;
268   }
269 
270   FieldDecl *getThisFieldDecl() const override {
271     if (OuterRegionInfo)
272       return OuterRegionInfo->getThisFieldDecl();
273     return nullptr;
274   }
275 
276   /// Get a variable or parameter for storing global thread id
277   /// inside OpenMP construct.
278   const VarDecl *getThreadIDVariable() const override {
279     if (OuterRegionInfo)
280       return OuterRegionInfo->getThreadIDVariable();
281     return nullptr;
282   }
283 
284   /// Get an LValue for the current ThreadID variable.
285   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286     if (OuterRegionInfo)
287       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288     llvm_unreachable("No LValue for inlined OpenMP construct");
289   }
290 
291   /// Get the name of the capture helper.
292   StringRef getHelperName() const override {
293     if (auto *OuterRegionInfo = getOldCSI())
294       return OuterRegionInfo->getHelperName();
295     llvm_unreachable("No helper name for inlined OpenMP construct");
296   }
297 
298   void emitUntiedSwitch(CodeGenFunction &CGF) override {
299     if (OuterRegionInfo)
300       OuterRegionInfo->emitUntiedSwitch(CGF);
301   }
302 
303   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 
305   static bool classof(const CGCapturedStmtInfo *Info) {
306     return CGOpenMPRegionInfo::classof(Info) &&
307            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308   }
309 
310   ~CGOpenMPInlinedRegionInfo() override = default;
311 
312 private:
313   /// CodeGen info about outer OpenMP region.
314   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315   CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317 
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
325   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
327       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328                            /*HasCancel=*/false),
329         HelperName(HelperName) {}
330 
331   /// This is unused for target regions because each starts executing
332   /// with a single thread.
333   const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 
335   /// Get the name of the capture helper.
336   StringRef getHelperName() const override { return HelperName; }
337 
338   static bool classof(const CGCapturedStmtInfo *Info) {
339     return CGOpenMPRegionInfo::classof(Info) &&
340            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341   }
342 
343 private:
344   StringRef HelperName;
345 };
346 
347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348   llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
354   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356                                   OMPD_unknown,
357                                   /*HasCancel=*/false),
358         PrivScope(CGF) {
359     // Make sure the globals captured in the provided statement are local by
360     // using the privatization logic. We assume the same variable is not
361     // captured more than once.
362     for (const auto &C : CS.captures()) {
363       if (!C.capturesVariable() && !C.capturesVariableByCopy())
364         continue;
365 
366       const VarDecl *VD = C.getCapturedVar();
367       if (VD->isLocalVarDeclOrParm())
368         continue;
369 
370       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371                       /*RefersToEnclosingVariableOrCapture=*/false,
372                       VD->getType().getNonReferenceType(), VK_LValue,
373                       C.getLocation());
374       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
375     }
376     (void)PrivScope.Privatize();
377   }
378 
379   /// Lookup the captured field decl for a variable.
380   const FieldDecl *lookup(const VarDecl *VD) const override {
381     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382       return FD;
383     return nullptr;
384   }
385 
386   /// Emit the captured statement body.
387   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388     llvm_unreachable("No body for expressions");
389   }
390 
391   /// Get a variable or parameter for storing global thread id
392   /// inside OpenMP construct.
393   const VarDecl *getThreadIDVariable() const override {
394     llvm_unreachable("No thread id for expressions");
395   }
396 
397   /// Get the name of the capture helper.
398   StringRef getHelperName() const override {
399     llvm_unreachable("No helper name for expressions");
400   }
401 
402   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405   /// Private scope to capture global variables.
406   CodeGenFunction::OMPPrivateScope PrivScope;
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411   CodeGenFunction &CGF;
412   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
413   FieldDecl *LambdaThisCaptureField = nullptr;
414   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415   bool NoInheritance = false;
416 
417 public:
418   /// Constructs region for combined constructs.
419   /// \param CodeGen Code generation sequence for combined directives. Includes
420   /// a list of functions used for code generation of implicitly inlined
421   /// regions.
422   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423                           OpenMPDirectiveKind Kind, bool HasCancel,
424                           bool NoInheritance = true)
425       : CGF(CGF), NoInheritance(NoInheritance) {
426     // Start emission for the construct.
427     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429     if (NoInheritance) {
430       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432       CGF.LambdaThisCaptureField = nullptr;
433       BlockInfo = CGF.BlockInfo;
434       CGF.BlockInfo = nullptr;
435     }
436   }
437 
438   ~InlinedOpenMPRegionRAII() {
439     // Restore original CapturedStmtInfo only if we're done with code emission.
440     auto *OldCSI =
441         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442     delete CGF.CapturedStmtInfo;
443     CGF.CapturedStmtInfo = OldCSI;
444     if (NoInheritance) {
445       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447       CGF.BlockInfo = BlockInfo;
448     }
449   }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456   /// Use trampoline for internal microtask.
457   OMP_IDENT_IMD = 0x01,
458   /// Use c-style ident structure.
459   OMP_IDENT_KMPC = 0x02,
460   /// Atomic reduction option for kmpc_reduce.
461   OMP_ATOMIC_REDUCE = 0x10,
462   /// Explicit 'barrier' directive.
463   OMP_IDENT_BARRIER_EXPL = 0x20,
464   /// Implicit barrier in code.
465   OMP_IDENT_BARRIER_IMPL = 0x40,
466   /// Implicit barrier in 'for' directive.
467   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468   /// Implicit barrier in 'sections' directive.
469   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470   /// Implicit barrier in 'single' directive.
471   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472   /// Call of __kmp_for_static_init for static loop.
473   OMP_IDENT_WORK_LOOP = 0x200,
474   /// Call of __kmp_for_static_init for sections.
475   OMP_IDENT_WORK_SECTIONS = 0x400,
476   /// Call of __kmp_for_static_init for distribute.
477   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 namespace {
482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
483 /// Values for bit flags for marking which requires clauses have been used.
484 enum OpenMPOffloadingRequiresDirFlags : int64_t {
485   /// flag undefined.
486   OMP_REQ_UNDEFINED               = 0x000,
487   /// no requires clause present.
488   OMP_REQ_NONE                    = 0x001,
489   /// reverse_offload clause.
490   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
491   /// unified_address clause.
492   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
493   /// unified_shared_memory clause.
494   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
495   /// dynamic_allocators clause.
496   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
497   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
498 };
499 
500 enum OpenMPOffloadingReservedDeviceIDs {
501   /// Device ID if the device was not defined, runtime should get it
502   /// from environment variables in the spec.
503   OMP_DEVICEID_UNDEF = -1,
504 };
505 } // anonymous namespace
506 
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
513 ///                                  see above  */
514 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
515 ///                                  KMP_IDENT_KMPC identifies this union
516 ///                                  member  */
517 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
518 ///                                  see above */
519 ///#if USE_ITT_BUILD
520 ///                            /*  but currently used for storing
521 ///                                region-specific ITT */
522 ///                            /*  contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
525 ///                                 C++  */
526 ///    char const *psource;    /**< String describing the source location.
527 ///                            The string is composed of semi-colon separated
528 //                             fields which describe the source file,
529 ///                            the function and a pair of line numbers that
530 ///                            delimit the construct.
531 ///                             */
532 /// } ident_t;
533 enum IdentFieldIndex {
534   /// might be used in Fortran
535   IdentField_Reserved_1,
536   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537   IdentField_Flags,
538   /// Not really used in Fortran any more
539   IdentField_Reserved_2,
540   /// Source[4] in Fortran, do not use for C++
541   IdentField_Reserved_3,
542   /// String describing the source location. The string is composed of
543   /// semi-colon separated fields which describe the source file, the function
544   /// and a pair of line numbers that delimit the construct.
545   IdentField_PSource
546 };
547 
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType {
551   /// Lower bound for default (unordered) versions.
552   OMP_sch_lower = 32,
553   OMP_sch_static_chunked = 33,
554   OMP_sch_static = 34,
555   OMP_sch_dynamic_chunked = 35,
556   OMP_sch_guided_chunked = 36,
557   OMP_sch_runtime = 37,
558   OMP_sch_auto = 38,
559   /// static with chunk adjustment (e.g., simd)
560   OMP_sch_static_balanced_chunked = 45,
561   /// Lower bound for 'ordered' versions.
562   OMP_ord_lower = 64,
563   OMP_ord_static_chunked = 65,
564   OMP_ord_static = 66,
565   OMP_ord_dynamic_chunked = 67,
566   OMP_ord_guided_chunked = 68,
567   OMP_ord_runtime = 69,
568   OMP_ord_auto = 70,
569   OMP_sch_default = OMP_sch_static,
570   /// dist_schedule types
571   OMP_dist_sch_static_chunked = 91,
572   OMP_dist_sch_static = 92,
573   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574   /// Set if the monotonic schedule modifier was present.
575   OMP_sch_modifier_monotonic = (1 << 29),
576   /// Set if the nonmonotonic schedule modifier was present.
577   OMP_sch_modifier_nonmonotonic = (1 << 30),
578 };
579 
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581 /// region.
582 class CleanupTy final : public EHScopeStack::Cleanup {
583   PrePostActionTy *Action;
584 
585 public:
586   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588     if (!CGF.HaveInsertPoint())
589       return;
590     Action->Exit(CGF);
591   }
592 };
593 
594 } // anonymous namespace
595 
596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
597   CodeGenFunction::RunCleanupsScope Scope(CGF);
598   if (PrePostAction) {
599     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600     Callback(CodeGen, CGF, *PrePostAction);
601   } else {
602     PrePostActionTy Action;
603     Callback(CodeGen, CGF, Action);
604   }
605 }
606 
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl *
610 getReductionInit(const Expr *ReductionOp) {
611   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613       if (const auto *DRE =
614               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616           return DRD;
617   return nullptr;
618 }
619 
620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
621                                              const OMPDeclareReductionDecl *DRD,
622                                              const Expr *InitOp,
623                                              Address Private, Address Original,
624                                              QualType Ty) {
625   if (DRD->getInitializer()) {
626     std::pair<llvm::Function *, llvm::Function *> Reduction =
627         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
628     const auto *CE = cast<CallExpr>(InitOp);
629     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632     const auto *LHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634     const auto *RHSDRE =
635         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   if (DRD)
691     SrcAddr =
692         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
693 
694   llvm::Value *SrcBegin = nullptr;
695   if (DRD)
696     SrcBegin = SrcAddr.getPointer();
697   llvm::Value *DestBegin = DestAddr.getPointer();
698   // Cast from pointer to array type to pointer to single element.
699   llvm::Value *DestEnd =
700       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
701   // The basic structure here is a while-do loop.
702   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704   llvm::Value *IsEmpty =
705       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707 
708   // Enter the loop body, making that address the current address.
709   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710   CGF.EmitBlock(BodyBB);
711 
712   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713 
714   llvm::PHINode *SrcElementPHI = nullptr;
715   Address SrcElementCurrent = Address::invalid();
716   if (DRD) {
717     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718                                           "omp.arraycpy.srcElementPast");
719     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720     SrcElementCurrent =
721         Address(SrcElementPHI, SrcAddr.getElementType(),
722                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723   }
724   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726   DestElementPHI->addIncoming(DestBegin, EntryBB);
727   Address DestElementCurrent =
728       Address(DestElementPHI, DestAddr.getElementType(),
729               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731   // Emit copy.
732   {
733     CodeGenFunction::RunCleanupsScope InitScope(CGF);
734     if (EmitDeclareReductionInit) {
735       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736                                        SrcElementCurrent, ElementTy);
737     } else
738       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739                            /*IsInitializer=*/false);
740   }
741 
742   if (DRD) {
743     // Shift the address forward by one element.
744     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
746         "omp.arraycpy.dest.element");
747     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748   }
749 
750   // Shift the address forward by one element.
751   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
753       "omp.arraycpy.dest.element");
754   // Check whether we've reached the end.
755   llvm::Value *Done =
756       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
757   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
758   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
759 
760   // Done.
761   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
762 }
763 
764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
765   return CGF.EmitOMPSharedLValue(E);
766 }
767 
768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
769                                             const Expr *E) {
770   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
771     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
772   return LValue();
773 }
774 
775 void ReductionCodeGen::emitAggregateInitialization(
776     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
777     const OMPDeclareReductionDecl *DRD) {
778   // Emit VarDecl with copy init for arrays.
779   // Get the address of the original variable captured in current
780   // captured region.
781   const auto *PrivateVD =
782       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
783   bool EmitDeclareReductionInit =
784       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
785   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
786                        EmitDeclareReductionInit,
787                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
788                                                 : PrivateVD->getInit(),
789                        DRD, SharedAddr);
790 }
791 
792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
793                                    ArrayRef<const Expr *> Origs,
794                                    ArrayRef<const Expr *> Privates,
795                                    ArrayRef<const Expr *> ReductionOps) {
796   ClausesData.reserve(Shareds.size());
797   SharedAddresses.reserve(Shareds.size());
798   Sizes.reserve(Shareds.size());
799   BaseDecls.reserve(Shareds.size());
800   const auto *IOrig = Origs.begin();
801   const auto *IPriv = Privates.begin();
802   const auto *IRed = ReductionOps.begin();
803   for (const Expr *Ref : Shareds) {
804     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
805     std::advance(IOrig, 1);
806     std::advance(IPriv, 1);
807     std::advance(IRed, 1);
808   }
809 }
810 
811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
812   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
813          "Number of generated lvalues must be exactly N.");
814   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
815   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
816   SharedAddresses.emplace_back(First, Second);
817   if (ClausesData[N].Shared == ClausesData[N].Ref) {
818     OrigAddresses.emplace_back(First, Second);
819   } else {
820     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
821     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
822     OrigAddresses.emplace_back(First, Second);
823   }
824 }
825 
826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
827   QualType PrivateType = getPrivateType(N);
828   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829   if (!PrivateType->isVariablyModifiedType()) {
830     Sizes.emplace_back(
831         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832         nullptr);
833     return;
834   }
835   llvm::Value *Size;
836   llvm::Value *SizeInChars;
837   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
838   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
839   if (AsArraySection) {
840     Size = CGF.Builder.CreatePtrDiff(ElemType,
841                                      OrigAddresses[N].second.getPointer(CGF),
842                                      OrigAddresses[N].first.getPointer(CGF));
843     Size = CGF.Builder.CreateNUWAdd(
844         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
845     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
846   } else {
847     SizeInChars =
848         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
849     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
850   }
851   Sizes.emplace_back(SizeInChars, Size);
852   CodeGenFunction::OpaqueValueMapping OpaqueMap(
853       CGF,
854       cast<OpaqueValueExpr>(
855           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
856       RValue::get(Size));
857   CGF.EmitVariablyModifiedType(PrivateType);
858 }
859 
860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
861                                          llvm::Value *Size) {
862   QualType PrivateType = getPrivateType(N);
863   if (!PrivateType->isVariablyModifiedType()) {
864     assert(!Size && !Sizes[N].second &&
865            "Size should be nullptr for non-variably modified reduction "
866            "items.");
867     return;
868   }
869   CodeGenFunction::OpaqueValueMapping OpaqueMap(
870       CGF,
871       cast<OpaqueValueExpr>(
872           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
873       RValue::get(Size));
874   CGF.EmitVariablyModifiedType(PrivateType);
875 }
876 
877 void ReductionCodeGen::emitInitialization(
878     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
879     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
880   assert(SharedAddresses.size() > N && "No variable was generated");
881   const auto *PrivateVD =
882       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
883   const OMPDeclareReductionDecl *DRD =
884       getReductionInit(ClausesData[N].ReductionOp);
885   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
886     if (DRD && DRD->getInitializer())
887       (void)DefaultInit(CGF);
888     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
889   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
890     (void)DefaultInit(CGF);
891     QualType SharedType = SharedAddresses[N].first.getType();
892     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
893                                      PrivateAddr, SharedAddr, SharedType);
894   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897                          PrivateVD->getType().getQualifiers(),
898                          /*IsInitializer=*/false);
899   }
900 }
901 
902 bool ReductionCodeGen::needCleanups(unsigned N) {
903   QualType PrivateType = getPrivateType(N);
904   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
905   return DTorKind != QualType::DK_none;
906 }
907 
908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
909                                     Address PrivateAddr) {
910   QualType PrivateType = getPrivateType(N);
911   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
912   if (needCleanups(N)) {
913     PrivateAddr = CGF.Builder.CreateElementBitCast(
914         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
915     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
916   }
917 }
918 
919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
920                           LValue BaseLV) {
921   BaseTy = BaseTy.getNonReferenceType();
922   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
923          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
924     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
925       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
926     } else {
927       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
928       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
929     }
930     BaseTy = BaseTy->getPointeeType();
931   }
932   return CGF.MakeAddrLValue(
933       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
934                                        CGF.ConvertTypeForMem(ElTy)),
935       BaseLV.getType(), BaseLV.getBaseInfo(),
936       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
937 }
938 
939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           Address OriginalBaseAddress, llvm::Value *Addr) {
941   Address Tmp = Address::invalid();
942   Address TopTmp = Address::invalid();
943   Address MostTopTmp = Address::invalid();
944   BaseTy = BaseTy.getNonReferenceType();
945   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
946          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
947     Tmp = CGF.CreateMemTemp(BaseTy);
948     if (TopTmp.isValid())
949       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
950     else
951       MostTopTmp = Tmp;
952     TopTmp = Tmp;
953     BaseTy = BaseTy->getPointeeType();
954   }
955 
956   if (Tmp.isValid()) {
957     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
958         Addr, Tmp.getElementType());
959     CGF.Builder.CreateStore(Addr, Tmp);
960     return MostTopTmp;
961   }
962 
963   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
964       Addr, OriginalBaseAddress.getType());
965   return OriginalBaseAddress.withPointer(Addr);
966 }
967 
968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
969   const VarDecl *OrigVD = nullptr;
970   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
971     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
972     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
973       Base = TempOASE->getBase()->IgnoreParenImpCasts();
974     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
975       Base = TempASE->getBase()->IgnoreParenImpCasts();
976     DE = cast<DeclRefExpr>(Base);
977     OrigVD = cast<VarDecl>(DE->getDecl());
978   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
979     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
980     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981       Base = TempASE->getBase()->IgnoreParenImpCasts();
982     DE = cast<DeclRefExpr>(Base);
983     OrigVD = cast<VarDecl>(DE->getDecl());
984   }
985   return OrigVD;
986 }
987 
988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
989                                                Address PrivateAddr) {
990   const DeclRefExpr *DE;
991   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
992     BaseDecls.emplace_back(OrigVD);
993     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
994     LValue BaseLValue =
995         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
996                     OriginalBaseLValue);
997     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
998     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
999         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1000         SharedAddr.getPointer());
1001     llvm::Value *PrivatePointer =
1002         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1003             PrivateAddr.getPointer(), SharedAddr.getType());
1004     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1005         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1006     return castToBase(CGF, OrigVD->getType(),
1007                       SharedAddresses[N].first.getType(),
1008                       OriginalBaseLValue.getAddress(CGF), Ptr);
1009   }
1010   BaseDecls.emplace_back(
1011       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1012   return PrivateAddr;
1013 }
1014 
1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1016   const OMPDeclareReductionDecl *DRD =
1017       getReductionInit(ClausesData[N].ReductionOp);
1018   return DRD && DRD->getInitializer();
1019 }
1020 
1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1022   return CGF.EmitLoadOfPointerLValue(
1023       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1024       getThreadIDVariable()->getType()->castAs<PointerType>());
1025 }
1026 
1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1028   if (!CGF.HaveInsertPoint())
1029     return;
1030   // 1.2.2 OpenMP Language Terminology
1031   // Structured block - An executable statement with a single entry at the
1032   // top and a single exit at the bottom.
1033   // The point of exit cannot be a branch out of the structured block.
1034   // longjmp() and throw() must not violate the entry/exit criteria.
1035   CGF.EHStack.pushTerminate();
1036   if (S)
1037     CGF.incrementProfileCounter(S);
1038   CodeGen(CGF);
1039   CGF.EHStack.popTerminate();
1040 }
1041 
1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043     CodeGenFunction &CGF) {
1044   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045                             getThreadIDVariable()->getType(),
1046                             AlignmentSource::Decl);
1047 }
1048 
1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050                                        QualType FieldTy) {
1051   auto *Field = FieldDecl::Create(
1052       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055   Field->setAccess(AS_public);
1056   DC->addDecl(Field);
1057   return Field;
1058 }
1059 
1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061                                  StringRef Separator)
1062     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1064   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1065 
1066   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1067   OMPBuilder.initialize();
1068   loadOffloadInfoMetadata();
1069 }
1070 
1071 void CGOpenMPRuntime::clear() {
1072   InternalVars.clear();
1073   // Clean non-target variable declarations possibly used only in debug info.
1074   for (const auto &Data : EmittedNonTargetVariables) {
1075     if (!Data.getValue().pointsToAliveValue())
1076       continue;
1077     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1078     if (!GV)
1079       continue;
1080     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1081       continue;
1082     GV->eraseFromParent();
1083   }
1084 }
1085 
1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1087   SmallString<128> Buffer;
1088   llvm::raw_svector_ostream OS(Buffer);
1089   StringRef Sep = FirstSeparator;
1090   for (StringRef Part : Parts) {
1091     OS << Sep << Part;
1092     Sep = Separator;
1093   }
1094   return std::string(OS.str());
1095 }
1096 
1097 static llvm::Function *
1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1099                           const Expr *CombinerInitializer, const VarDecl *In,
1100                           const VarDecl *Out, bool IsCombiner) {
1101   // void .omp_combiner.(Ty *in, Ty *out);
1102   ASTContext &C = CGM.getContext();
1103   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1104   FunctionArgList Args;
1105   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1106                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1107   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1108                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   Args.push_back(&OmpOutParm);
1110   Args.push_back(&OmpInParm);
1111   const CGFunctionInfo &FnInfo =
1112       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1113   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1114   std::string Name = CGM.getOpenMPRuntime().getName(
1115       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1116   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1117                                     Name, &CGM.getModule());
1118   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1119   if (CGM.getLangOpts().Optimize) {
1120     Fn->removeFnAttr(llvm::Attribute::NoInline);
1121     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1122     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1123   }
1124   CodeGenFunction CGF(CGM);
1125   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1126   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1127   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1128                     Out->getLocation());
1129   CodeGenFunction::OMPPrivateScope Scope(CGF);
1130   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1131   Scope.addPrivate(
1132       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1133               .getAddress(CGF));
1134   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1135   Scope.addPrivate(
1136       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1137                .getAddress(CGF));
1138   (void)Scope.Privatize();
1139   if (!IsCombiner && Out->hasInit() &&
1140       !CGF.isTrivialInitializer(Out->getInit())) {
1141     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1142                          Out->getType().getQualifiers(),
1143                          /*IsInitializer=*/true);
1144   }
1145   if (CombinerInitializer)
1146     CGF.EmitIgnoredExpr(CombinerInitializer);
1147   Scope.ForceCleanup();
1148   CGF.FinishFunction();
1149   return Fn;
1150 }
1151 
1152 void CGOpenMPRuntime::emitUserDefinedReduction(
1153     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1154   if (UDRMap.count(D) > 0)
1155     return;
1156   llvm::Function *Combiner = emitCombinerOrInitializer(
1157       CGM, D->getType(), D->getCombiner(),
1158       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1159       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1160       /*IsCombiner=*/true);
1161   llvm::Function *Initializer = nullptr;
1162   if (const Expr *Init = D->getInitializer()) {
1163     Initializer = emitCombinerOrInitializer(
1164         CGM, D->getType(),
1165         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1166                                                                      : nullptr,
1167         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1168         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1169         /*IsCombiner=*/false);
1170   }
1171   UDRMap.try_emplace(D, Combiner, Initializer);
1172   if (CGF) {
1173     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1174     Decls.second.push_back(D);
1175   }
1176 }
1177 
1178 std::pair<llvm::Function *, llvm::Function *>
1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1180   auto I = UDRMap.find(D);
1181   if (I != UDRMap.end())
1182     return I->second;
1183   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1184   return UDRMap.lookup(D);
1185 }
1186 
1187 namespace {
1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1189 // Builder if one is present.
1190 struct PushAndPopStackRAII {
1191   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1192                       bool HasCancel, llvm::omp::Directive Kind)
1193       : OMPBuilder(OMPBuilder) {
1194     if (!OMPBuilder)
1195       return;
1196 
1197     // The following callback is the crucial part of clangs cleanup process.
1198     //
1199     // NOTE:
1200     // Once the OpenMPIRBuilder is used to create parallel regions (and
1201     // similar), the cancellation destination (Dest below) is determined via
1202     // IP. That means if we have variables to finalize we split the block at IP,
1203     // use the new block (=BB) as destination to build a JumpDest (via
1204     // getJumpDestInCurrentScope(BB)) which then is fed to
1205     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1206     // to push & pop an FinalizationInfo object.
1207     // The FiniCB will still be needed but at the point where the
1208     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1209     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1210       assert(IP.getBlock()->end() == IP.getPoint() &&
1211              "Clang CG should cause non-terminated block!");
1212       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1213       CGF.Builder.restoreIP(IP);
1214       CodeGenFunction::JumpDest Dest =
1215           CGF.getOMPCancelDestination(OMPD_parallel);
1216       CGF.EmitBranchThroughCleanup(Dest);
1217     };
1218 
1219     // TODO: Remove this once we emit parallel regions through the
1220     //       OpenMPIRBuilder as it can do this setup internally.
1221     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1222     OMPBuilder->pushFinalizationCB(std::move(FI));
1223   }
1224   ~PushAndPopStackRAII() {
1225     if (OMPBuilder)
1226       OMPBuilder->popFinalizationCB();
1227   }
1228   llvm::OpenMPIRBuilder *OMPBuilder;
1229 };
1230 } // namespace
1231 
1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1233     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1234     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1235     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1236   assert(ThreadIDVar->getType()->isPointerType() &&
1237          "thread id variable must be of type kmp_int32 *");
1238   CodeGenFunction CGF(CGM, true);
1239   bool HasCancel = false;
1240   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1241     HasCancel = OPD->hasCancel();
1242   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1243     HasCancel = OPD->hasCancel();
1244   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1245     HasCancel = OPSD->hasCancel();
1246   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1247     HasCancel = OPFD->hasCancel();
1248   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1249     HasCancel = OPFD->hasCancel();
1250   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1251     HasCancel = OPFD->hasCancel();
1252   else if (const auto *OPFD =
1253                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD =
1256                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1257     HasCancel = OPFD->hasCancel();
1258 
1259   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1260   //       parallel region to make cancellation barriers work properly.
1261   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1262   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1263   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1264                                     HasCancel, OutlinedHelperName);
1265   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1266   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1267 }
1268 
1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1270     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1271     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1272   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1273   return emitParallelOrTeamsOutlinedFunction(
1274       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1275 }
1276 
1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1278     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1280   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1281   return emitParallelOrTeamsOutlinedFunction(
1282       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1283 }
1284 
1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1286     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1288     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1289     bool Tied, unsigned &NumberOfParts) {
1290   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1291                                               PrePostActionTy &) {
1292     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1293     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1294     llvm::Value *TaskArgs[] = {
1295         UpLoc, ThreadID,
1296         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1297                                     TaskTVar->getType()->castAs<PointerType>())
1298             .getPointer(CGF)};
1299     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1301                         TaskArgs);
1302   };
1303   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1304                                                             UntiedCodeGen);
1305   CodeGen.setAction(Action);
1306   assert(!ThreadIDVar->getType()->isPointerType() &&
1307          "thread id variable must be of type kmp_int32 for tasks");
1308   const OpenMPDirectiveKind Region =
1309       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1310                                                       : OMPD_task;
1311   const CapturedStmt *CS = D.getCapturedStmt(Region);
1312   bool HasCancel = false;
1313   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1314     HasCancel = TD->hasCancel();
1315   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1316     HasCancel = TD->hasCancel();
1317   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1318     HasCancel = TD->hasCancel();
1319   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1320     HasCancel = TD->hasCancel();
1321 
1322   CodeGenFunction CGF(CGM, true);
1323   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1324                                         InnermostKind, HasCancel, Action);
1325   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1326   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1327   if (!Tied)
1328     NumberOfParts = Action.getNumberOfParts();
1329   return Res;
1330 }
1331 
1332 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1333                                              bool AtCurrentPoint) {
1334   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1335   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1336 
1337   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1338   if (AtCurrentPoint) {
1339     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1340         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1341   } else {
1342     Elem.second.ServiceInsertPt =
1343         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1344     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1345   }
1346 }
1347 
1348 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1349   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1350   if (Elem.second.ServiceInsertPt) {
1351     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1352     Elem.second.ServiceInsertPt = nullptr;
1353     Ptr->eraseFromParent();
1354   }
1355 }
1356 
1357 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1358                                                   SourceLocation Loc,
1359                                                   SmallString<128> &Buffer) {
1360   llvm::raw_svector_ostream OS(Buffer);
1361   // Build debug location
1362   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1363   OS << ";" << PLoc.getFilename() << ";";
1364   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1365     OS << FD->getQualifiedNameAsString();
1366   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1367   return OS.str();
1368 }
1369 
1370 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1371                                                  SourceLocation Loc,
1372                                                  unsigned Flags) {
1373   uint32_t SrcLocStrSize;
1374   llvm::Constant *SrcLocStr;
1375   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1376       Loc.isInvalid()) {
1377     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1378   } else {
1379     std::string FunctionName;
1380     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1381       FunctionName = FD->getQualifiedNameAsString();
1382     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1383     const char *FileName = PLoc.getFilename();
1384     unsigned Line = PLoc.getLine();
1385     unsigned Column = PLoc.getColumn();
1386     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1387                                                 Column, SrcLocStrSize);
1388   }
1389   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1390   return OMPBuilder.getOrCreateIdent(
1391       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1392 }
1393 
1394 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1395                                           SourceLocation Loc) {
1396   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1397   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1398   // the clang invariants used below might be broken.
1399   if (CGM.getLangOpts().OpenMPIRBuilder) {
1400     SmallString<128> Buffer;
1401     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1402     uint32_t SrcLocStrSize;
1403     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1404         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1405     return OMPBuilder.getOrCreateThreadID(
1406         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1407   }
1408 
1409   llvm::Value *ThreadID = nullptr;
1410   // Check whether we've already cached a load of the thread id in this
1411   // function.
1412   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1413   if (I != OpenMPLocThreadIDMap.end()) {
1414     ThreadID = I->second.ThreadID;
1415     if (ThreadID != nullptr)
1416       return ThreadID;
1417   }
1418   // If exceptions are enabled, do not use parameter to avoid possible crash.
1419   if (auto *OMPRegionInfo =
1420           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1421     if (OMPRegionInfo->getThreadIDVariable()) {
1422       // Check if this an outlined function with thread id passed as argument.
1423       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1424       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1425       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1426           !CGF.getLangOpts().CXXExceptions ||
1427           CGF.Builder.GetInsertBlock() == TopBlock ||
1428           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1429           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1430               TopBlock ||
1431           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1432               CGF.Builder.GetInsertBlock()) {
1433         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1434         // If value loaded in entry block, cache it and use it everywhere in
1435         // function.
1436         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1437           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1438           Elem.second.ThreadID = ThreadID;
1439         }
1440         return ThreadID;
1441       }
1442     }
1443   }
1444 
1445   // This is not an outlined function region - need to call __kmpc_int32
1446   // kmpc_global_thread_num(ident_t *loc).
1447   // Generate thread id value and cache this value for use across the
1448   // function.
1449   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1450   if (!Elem.second.ServiceInsertPt)
1451     setLocThreadIdInsertPt(CGF);
1452   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1454   llvm::CallInst *Call = CGF.Builder.CreateCall(
1455       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1456                                             OMPRTL___kmpc_global_thread_num),
1457       emitUpdateLocation(CGF, Loc));
1458   Call->setCallingConv(CGF.getRuntimeCC());
1459   Elem.second.ThreadID = Call;
1460   return Call;
1461 }
1462 
1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1464   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1466     clearLocThreadIdInsertPt(CGF);
1467     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1468   }
1469   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1470     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471       UDRMap.erase(D);
1472     FunctionUDRMap.erase(CGF.CurFn);
1473   }
1474   auto I = FunctionUDMMap.find(CGF.CurFn);
1475   if (I != FunctionUDMMap.end()) {
1476     for(const auto *D : I->second)
1477       UDMMap.erase(D);
1478     FunctionUDMMap.erase(I);
1479   }
1480   LastprivateConditionalToTypes.erase(CGF.CurFn);
1481   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1482 }
1483 
1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485   return OMPBuilder.IdentPtr;
1486 }
1487 
1488 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1489   if (!Kmpc_MicroTy) {
1490     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1492                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1493     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1494   }
1495   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1496 }
1497 
1498 llvm::FunctionCallee
1499 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1500                                              bool IsGPUDistribute) {
1501   assert((IVSize == 32 || IVSize == 64) &&
1502          "IV size is not compatible with the omp runtime");
1503   StringRef Name;
1504   if (IsGPUDistribute)
1505     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1506                                     : "__kmpc_distribute_static_init_4u")
1507                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1508                                     : "__kmpc_distribute_static_init_8u");
1509   else
1510     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1511                                     : "__kmpc_for_static_init_4u")
1512                         : (IVSigned ? "__kmpc_for_static_init_8"
1513                                     : "__kmpc_for_static_init_8u");
1514 
1515   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1516   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1517   llvm::Type *TypeParams[] = {
1518     getIdentTyPointerTy(),                     // loc
1519     CGM.Int32Ty,                               // tid
1520     CGM.Int32Ty,                               // schedtype
1521     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1522     PtrTy,                                     // p_lower
1523     PtrTy,                                     // p_upper
1524     PtrTy,                                     // p_stride
1525     ITy,                                       // incr
1526     ITy                                        // chunk
1527   };
1528   auto *FnTy =
1529       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1530   return CGM.CreateRuntimeFunction(FnTy, Name);
1531 }
1532 
1533 llvm::FunctionCallee
1534 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1535   assert((IVSize == 32 || IVSize == 64) &&
1536          "IV size is not compatible with the omp runtime");
1537   StringRef Name =
1538       IVSize == 32
1539           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1540           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1541   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1542   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1543                                CGM.Int32Ty,           // tid
1544                                CGM.Int32Ty,           // schedtype
1545                                ITy,                   // lower
1546                                ITy,                   // upper
1547                                ITy,                   // stride
1548                                ITy                    // chunk
1549   };
1550   auto *FnTy =
1551       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1552   return CGM.CreateRuntimeFunction(FnTy, Name);
1553 }
1554 
1555 llvm::FunctionCallee
1556 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1557   assert((IVSize == 32 || IVSize == 64) &&
1558          "IV size is not compatible with the omp runtime");
1559   StringRef Name =
1560       IVSize == 32
1561           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1562           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1563   llvm::Type *TypeParams[] = {
1564       getIdentTyPointerTy(), // loc
1565       CGM.Int32Ty,           // tid
1566   };
1567   auto *FnTy =
1568       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1569   return CGM.CreateRuntimeFunction(FnTy, Name);
1570 }
1571 
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1574   assert((IVSize == 32 || IVSize == 64) &&
1575          "IV size is not compatible with the omp runtime");
1576   StringRef Name =
1577       IVSize == 32
1578           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1579           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1582   llvm::Type *TypeParams[] = {
1583     getIdentTyPointerTy(),                     // loc
1584     CGM.Int32Ty,                               // tid
1585     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1586     PtrTy,                                     // p_lower
1587     PtrTy,                                     // p_upper
1588     PtrTy                                      // p_stride
1589   };
1590   auto *FnTy =
1591       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1592   return CGM.CreateRuntimeFunction(FnTy, Name);
1593 }
1594 
1595 /// Obtain information that uniquely identifies a target entry. This
1596 /// consists of the file and device IDs as well as line number associated with
1597 /// the relevant entry source location.
1598 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1599                                      unsigned &DeviceID, unsigned &FileID,
1600                                      unsigned &LineNum) {
1601   SourceManager &SM = C.getSourceManager();
1602 
1603   // The loc should be always valid and have a file ID (the user cannot use
1604   // #pragma directives in macros)
1605 
1606   assert(Loc.isValid() && "Source location is expected to be always valid.");
1607 
1608   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1609   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1610 
1611   llvm::sys::fs::UniqueID ID;
1612   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1613     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1614     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1615     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1616       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1617           << PLoc.getFilename() << EC.message();
1618   }
1619 
1620   DeviceID = ID.getDevice();
1621   FileID = ID.getFile();
1622   LineNum = PLoc.getLine();
1623 }
1624 
1625 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1626   if (CGM.getLangOpts().OpenMPSimd)
1627     return Address::invalid();
1628   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1629       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1630   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1631               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1632                HasRequiresUnifiedSharedMemory))) {
1633     SmallString<64> PtrName;
1634     {
1635       llvm::raw_svector_ostream OS(PtrName);
1636       OS << CGM.getMangledName(GlobalDecl(VD));
1637       if (!VD->isExternallyVisible()) {
1638         unsigned DeviceID, FileID, Line;
1639         getTargetEntryUniqueInfo(CGM.getContext(),
1640                                  VD->getCanonicalDecl()->getBeginLoc(),
1641                                  DeviceID, FileID, Line);
1642         OS << llvm::format("_%x", FileID);
1643       }
1644       OS << "_decl_tgt_ref_ptr";
1645     }
1646     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1647     QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1648     llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1649     if (!Ptr) {
1650       Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1651 
1652       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1653       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1654 
1655       if (!CGM.getLangOpts().OpenMPIsDevice)
1656         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1657       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1658     }
1659     return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1660   }
1661   return Address::invalid();
1662 }
1663 
1664 llvm::Constant *
1665 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1666   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1667          !CGM.getContext().getTargetInfo().isTLSSupported());
1668   // Lookup the entry, lazily creating it if necessary.
1669   std::string Suffix = getName({"cache", ""});
1670   return getOrCreateInternalVariable(
1671       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1672 }
1673 
1674 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1675                                                 const VarDecl *VD,
1676                                                 Address VDAddr,
1677                                                 SourceLocation Loc) {
1678   if (CGM.getLangOpts().OpenMPUseTLS &&
1679       CGM.getContext().getTargetInfo().isTLSSupported())
1680     return VDAddr;
1681 
1682   llvm::Type *VarTy = VDAddr.getElementType();
1683   llvm::Value *Args[] = {
1684       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1685       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1686       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1687       getOrCreateThreadPrivateCache(VD)};
1688   return Address(
1689       CGF.EmitRuntimeCall(
1690           OMPBuilder.getOrCreateRuntimeFunction(
1691               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1692           Args),
1693       CGF.Int8Ty, VDAddr.getAlignment());
1694 }
1695 
1696 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1697     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1698     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1699   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1700   // library.
1701   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1702   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1703                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1704                       OMPLoc);
1705   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1706   // to register constructor/destructor for variable.
1707   llvm::Value *Args[] = {
1708       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1709       Ctor, CopyCtor, Dtor};
1710   CGF.EmitRuntimeCall(
1711       OMPBuilder.getOrCreateRuntimeFunction(
1712           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1713       Args);
1714 }
1715 
1716 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1717     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1718     bool PerformInit, CodeGenFunction *CGF) {
1719   if (CGM.getLangOpts().OpenMPUseTLS &&
1720       CGM.getContext().getTargetInfo().isTLSSupported())
1721     return nullptr;
1722 
1723   VD = VD->getDefinition(CGM.getContext());
1724   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1725     QualType ASTTy = VD->getType();
1726 
1727     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1728     const Expr *Init = VD->getAnyInitializer();
1729     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1730       // Generate function that re-emits the declaration's initializer into the
1731       // threadprivate copy of the variable VD
1732       CodeGenFunction CtorCGF(CGM);
1733       FunctionArgList Args;
1734       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1735                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1736                             ImplicitParamDecl::Other);
1737       Args.push_back(&Dst);
1738 
1739       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1740           CGM.getContext().VoidPtrTy, Args);
1741       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1742       std::string Name = getName({"__kmpc_global_ctor_", ""});
1743       llvm::Function *Fn =
1744           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1745       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1746                             Args, Loc, Loc);
1747       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1748           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1749           CGM.getContext().VoidPtrTy, Dst.getLocation());
1750       Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1751       Arg = CtorCGF.Builder.CreateElementBitCast(
1752           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1753       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1754                                /*IsInitializer=*/true);
1755       ArgVal = CtorCGF.EmitLoadOfScalar(
1756           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1757           CGM.getContext().VoidPtrTy, Dst.getLocation());
1758       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1759       CtorCGF.FinishFunction();
1760       Ctor = Fn;
1761     }
1762     if (VD->getType().isDestructedType() != QualType::DK_none) {
1763       // Generate function that emits destructor call for the threadprivate copy
1764       // of the variable VD
1765       CodeGenFunction DtorCGF(CGM);
1766       FunctionArgList Args;
1767       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1768                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1769                             ImplicitParamDecl::Other);
1770       Args.push_back(&Dst);
1771 
1772       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1773           CGM.getContext().VoidTy, Args);
1774       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1775       std::string Name = getName({"__kmpc_global_dtor_", ""});
1776       llvm::Function *Fn =
1777           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1778       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1779       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1780                             Loc, Loc);
1781       // Create a scope with an artificial location for the body of this function.
1782       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1783       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1784           DtorCGF.GetAddrOfLocalVar(&Dst),
1785           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1786       DtorCGF.emitDestroy(
1787           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1788           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1789           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1790       DtorCGF.FinishFunction();
1791       Dtor = Fn;
1792     }
1793     // Do not emit init function if it is not required.
1794     if (!Ctor && !Dtor)
1795       return nullptr;
1796 
1797     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1798     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1799                                                /*isVarArg=*/false)
1800                            ->getPointerTo();
1801     // Copying constructor for the threadprivate variable.
1802     // Must be NULL - reserved by runtime, but currently it requires that this
1803     // parameter is always NULL. Otherwise it fires assertion.
1804     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1805     if (Ctor == nullptr) {
1806       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1807                                              /*isVarArg=*/false)
1808                          ->getPointerTo();
1809       Ctor = llvm::Constant::getNullValue(CtorTy);
1810     }
1811     if (Dtor == nullptr) {
1812       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1813                                              /*isVarArg=*/false)
1814                          ->getPointerTo();
1815       Dtor = llvm::Constant::getNullValue(DtorTy);
1816     }
1817     if (!CGF) {
1818       auto *InitFunctionTy =
1819           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1820       std::string Name = getName({"__omp_threadprivate_init_", ""});
1821       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1822           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1823       CodeGenFunction InitCGF(CGM);
1824       FunctionArgList ArgList;
1825       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1826                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1827                             Loc, Loc);
1828       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1829       InitCGF.FinishFunction();
1830       return InitFunction;
1831     }
1832     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1833   }
1834   return nullptr;
1835 }
1836 
1837 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1838                                                      llvm::GlobalVariable *Addr,
1839                                                      bool PerformInit) {
1840   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1841       !CGM.getLangOpts().OpenMPIsDevice)
1842     return false;
1843   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1844       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1845   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1846       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1847        HasRequiresUnifiedSharedMemory))
1848     return CGM.getLangOpts().OpenMPIsDevice;
1849   VD = VD->getDefinition(CGM.getContext());
1850   assert(VD && "Unknown VarDecl");
1851 
1852   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1853     return CGM.getLangOpts().OpenMPIsDevice;
1854 
1855   QualType ASTTy = VD->getType();
1856   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1857 
1858   // Produce the unique prefix to identify the new target regions. We use
1859   // the source location of the variable declaration which we know to not
1860   // conflict with any target region.
1861   unsigned DeviceID;
1862   unsigned FileID;
1863   unsigned Line;
1864   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1865   SmallString<128> Buffer, Out;
1866   {
1867     llvm::raw_svector_ostream OS(Buffer);
1868     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1869        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1870   }
1871 
1872   const Expr *Init = VD->getAnyInitializer();
1873   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1874     llvm::Constant *Ctor;
1875     llvm::Constant *ID;
1876     if (CGM.getLangOpts().OpenMPIsDevice) {
1877       // Generate function that re-emits the declaration's initializer into
1878       // the threadprivate copy of the variable VD
1879       CodeGenFunction CtorCGF(CGM);
1880 
1881       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1882       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1883       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1884           FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1885           llvm::GlobalValue::WeakODRLinkage);
1886       if (CGM.getTriple().isAMDGCN())
1887         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1888       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1889       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1890                             FunctionArgList(), Loc, Loc);
1891       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1892       llvm::Constant *AddrInAS0 = Addr;
1893       if (Addr->getAddressSpace() != 0)
1894         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1895             Addr, llvm::PointerType::getWithSamePointeeType(
1896                       cast<llvm::PointerType>(Addr->getType()), 0));
1897       CtorCGF.EmitAnyExprToMem(Init,
1898                                Address(AddrInAS0, Addr->getValueType(),
1899                                        CGM.getContext().getDeclAlign(VD)),
1900                                Init->getType().getQualifiers(),
1901                                /*IsInitializer=*/true);
1902       CtorCGF.FinishFunction();
1903       Ctor = Fn;
1904       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1905     } else {
1906       Ctor = new llvm::GlobalVariable(
1907           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1908           llvm::GlobalValue::PrivateLinkage,
1909           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1910       ID = Ctor;
1911     }
1912 
1913     // Register the information for the entry associated with the constructor.
1914     Out.clear();
1915     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1916         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1917         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1918   }
1919   if (VD->getType().isDestructedType() != QualType::DK_none) {
1920     llvm::Constant *Dtor;
1921     llvm::Constant *ID;
1922     if (CGM.getLangOpts().OpenMPIsDevice) {
1923       // Generate function that emits destructor call for the threadprivate
1924       // copy of the variable VD
1925       CodeGenFunction DtorCGF(CGM);
1926 
1927       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1928       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1929       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1930           FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1931           llvm::GlobalValue::WeakODRLinkage);
1932       if (CGM.getTriple().isAMDGCN())
1933         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1934       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1935       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1936                             FunctionArgList(), Loc, Loc);
1937       // Create a scope with an artificial location for the body of this
1938       // function.
1939       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1940       llvm::Constant *AddrInAS0 = Addr;
1941       if (Addr->getAddressSpace() != 0)
1942         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1943             Addr, llvm::PointerType::getWithSamePointeeType(
1944                       cast<llvm::PointerType>(Addr->getType()), 0));
1945       DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1946                                   CGM.getContext().getDeclAlign(VD)),
1947                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1948                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1949       DtorCGF.FinishFunction();
1950       Dtor = Fn;
1951       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1952     } else {
1953       Dtor = new llvm::GlobalVariable(
1954           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1955           llvm::GlobalValue::PrivateLinkage,
1956           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1957       ID = Dtor;
1958     }
1959     // Register the information for the entry associated with the destructor.
1960     Out.clear();
1961     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1962         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1963         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1964   }
1965   return CGM.getLangOpts().OpenMPIsDevice;
1966 }
1967 
1968 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1969                                                           QualType VarType,
1970                                                           StringRef Name) {
1971   std::string Suffix = getName({"artificial", ""});
1972   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1973   llvm::GlobalVariable *GAddr =
1974       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1975   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1976       CGM.getTarget().isTLSSupported()) {
1977     GAddr->setThreadLocal(/*Val=*/true);
1978     return Address(GAddr, GAddr->getValueType(),
1979                    CGM.getContext().getTypeAlignInChars(VarType));
1980   }
1981   std::string CacheSuffix = getName({"cache", ""});
1982   llvm::Value *Args[] = {
1983       emitUpdateLocation(CGF, SourceLocation()),
1984       getThreadID(CGF, SourceLocation()),
1985       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1986       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1987                                 /*isSigned=*/false),
1988       getOrCreateInternalVariable(
1989           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
1990   return Address(
1991       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1992           CGF.EmitRuntimeCall(
1993               OMPBuilder.getOrCreateRuntimeFunction(
1994                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1995               Args),
1996           VarLVType->getPointerTo(/*AddrSpace=*/0)),
1997       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1998 }
1999 
2000 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2001                                    const RegionCodeGenTy &ThenGen,
2002                                    const RegionCodeGenTy &ElseGen) {
2003   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2004 
2005   // If the condition constant folds and can be elided, try to avoid emitting
2006   // the condition and the dead arm of the if/else.
2007   bool CondConstant;
2008   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2009     if (CondConstant)
2010       ThenGen(CGF);
2011     else
2012       ElseGen(CGF);
2013     return;
2014   }
2015 
2016   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2017   // emit the conditional branch.
2018   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2019   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2020   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2021   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2022 
2023   // Emit the 'then' code.
2024   CGF.EmitBlock(ThenBlock);
2025   ThenGen(CGF);
2026   CGF.EmitBranch(ContBlock);
2027   // Emit the 'else' code if present.
2028   // There is no need to emit line number for unconditional branch.
2029   (void)ApplyDebugLocation::CreateEmpty(CGF);
2030   CGF.EmitBlock(ElseBlock);
2031   ElseGen(CGF);
2032   // There is no need to emit line number for unconditional branch.
2033   (void)ApplyDebugLocation::CreateEmpty(CGF);
2034   CGF.EmitBranch(ContBlock);
2035   // Emit the continuation block for code after the if.
2036   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2037 }
2038 
2039 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2040                                        llvm::Function *OutlinedFn,
2041                                        ArrayRef<llvm::Value *> CapturedVars,
2042                                        const Expr *IfCond,
2043                                        llvm::Value *NumThreads) {
2044   if (!CGF.HaveInsertPoint())
2045     return;
2046   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2047   auto &M = CGM.getModule();
2048   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2049                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2050     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2051     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2052     llvm::Value *Args[] = {
2053         RTLoc,
2054         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2055         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2056     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2057     RealArgs.append(std::begin(Args), std::end(Args));
2058     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2059 
2060     llvm::FunctionCallee RTLFn =
2061         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2062     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2063   };
2064   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2065                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2066     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2068     // Build calls:
2069     // __kmpc_serialized_parallel(&Loc, GTid);
2070     llvm::Value *Args[] = {RTLoc, ThreadID};
2071     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2072                             M, OMPRTL___kmpc_serialized_parallel),
2073                         Args);
2074 
2075     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2076     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2077     Address ZeroAddrBound =
2078         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2079                                          /*Name=*/".bound.zero.addr");
2080     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2081     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2082     // ThreadId for serialized parallels is 0.
2083     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2084     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2085     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2086 
2087     // Ensure we do not inline the function. This is trivially true for the ones
2088     // passed to __kmpc_fork_call but the ones called in serialized regions
2089     // could be inlined. This is not a perfect but it is closer to the invariant
2090     // we want, namely, every data environment starts with a new function.
2091     // TODO: We should pass the if condition to the runtime function and do the
2092     //       handling there. Much cleaner code.
2093     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2094     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2095     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2096 
2097     // __kmpc_end_serialized_parallel(&Loc, GTid);
2098     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2099     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2100                             M, OMPRTL___kmpc_end_serialized_parallel),
2101                         EndArgs);
2102   };
2103   if (IfCond) {
2104     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2105   } else {
2106     RegionCodeGenTy ThenRCG(ThenGen);
2107     ThenRCG(CGF);
2108   }
2109 }
2110 
2111 // If we're inside an (outlined) parallel region, use the region info's
2112 // thread-ID variable (it is passed in a first argument of the outlined function
2113 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2114 // regular serial code region, get thread ID by calling kmp_int32
2115 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2116 // return the address of that temp.
2117 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2118                                              SourceLocation Loc) {
2119   if (auto *OMPRegionInfo =
2120           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2121     if (OMPRegionInfo->getThreadIDVariable())
2122       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2123 
2124   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2125   QualType Int32Ty =
2126       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2127   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2128   CGF.EmitStoreOfScalar(ThreadID,
2129                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2130 
2131   return ThreadIDTemp;
2132 }
2133 
2134 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2135     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2136   SmallString<256> Buffer;
2137   llvm::raw_svector_ostream Out(Buffer);
2138   Out << Name;
2139   StringRef RuntimeName = Out.str();
2140   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2141   if (Elem.second) {
2142     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2143            "OMP internal variable has different type than requested");
2144     return &*Elem.second;
2145   }
2146 
2147   return Elem.second = new llvm::GlobalVariable(
2148              CGM.getModule(), Ty, /*IsConstant*/ false,
2149              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2150              Elem.first(), /*InsertBefore=*/nullptr,
2151              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2152 }
2153 
2154 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2155   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2156   std::string Name = getName({Prefix, "var"});
2157   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2158 }
2159 
2160 namespace {
2161 /// Common pre(post)-action for different OpenMP constructs.
2162 class CommonActionTy final : public PrePostActionTy {
2163   llvm::FunctionCallee EnterCallee;
2164   ArrayRef<llvm::Value *> EnterArgs;
2165   llvm::FunctionCallee ExitCallee;
2166   ArrayRef<llvm::Value *> ExitArgs;
2167   bool Conditional;
2168   llvm::BasicBlock *ContBlock = nullptr;
2169 
2170 public:
2171   CommonActionTy(llvm::FunctionCallee EnterCallee,
2172                  ArrayRef<llvm::Value *> EnterArgs,
2173                  llvm::FunctionCallee ExitCallee,
2174                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2175       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2176         ExitArgs(ExitArgs), Conditional(Conditional) {}
2177   void Enter(CodeGenFunction &CGF) override {
2178     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2179     if (Conditional) {
2180       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2181       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2182       ContBlock = CGF.createBasicBlock("omp_if.end");
2183       // Generate the branch (If-stmt)
2184       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2185       CGF.EmitBlock(ThenBlock);
2186     }
2187   }
2188   void Done(CodeGenFunction &CGF) {
2189     // Emit the rest of blocks/branches
2190     CGF.EmitBranch(ContBlock);
2191     CGF.EmitBlock(ContBlock, true);
2192   }
2193   void Exit(CodeGenFunction &CGF) override {
2194     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2195   }
2196 };
2197 } // anonymous namespace
2198 
2199 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2200                                          StringRef CriticalName,
2201                                          const RegionCodeGenTy &CriticalOpGen,
2202                                          SourceLocation Loc, const Expr *Hint) {
2203   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2204   // CriticalOpGen();
2205   // __kmpc_end_critical(ident_t *, gtid, Lock);
2206   // Prepare arguments and build a call to __kmpc_critical
2207   if (!CGF.HaveInsertPoint())
2208     return;
2209   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2210                          getCriticalRegionLock(CriticalName)};
2211   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2212                                                 std::end(Args));
2213   if (Hint) {
2214     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2215         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2216   }
2217   CommonActionTy Action(
2218       OMPBuilder.getOrCreateRuntimeFunction(
2219           CGM.getModule(),
2220           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2221       EnterArgs,
2222       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2223                                             OMPRTL___kmpc_end_critical),
2224       Args);
2225   CriticalOpGen.setAction(Action);
2226   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2227 }
2228 
2229 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2230                                        const RegionCodeGenTy &MasterOpGen,
2231                                        SourceLocation Loc) {
2232   if (!CGF.HaveInsertPoint())
2233     return;
2234   // if(__kmpc_master(ident_t *, gtid)) {
2235   //   MasterOpGen();
2236   //   __kmpc_end_master(ident_t *, gtid);
2237   // }
2238   // Prepare arguments and build a call to __kmpc_master
2239   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2240   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2241                             CGM.getModule(), OMPRTL___kmpc_master),
2242                         Args,
2243                         OMPBuilder.getOrCreateRuntimeFunction(
2244                             CGM.getModule(), OMPRTL___kmpc_end_master),
2245                         Args,
2246                         /*Conditional=*/true);
2247   MasterOpGen.setAction(Action);
2248   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2249   Action.Done(CGF);
2250 }
2251 
2252 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2253                                        const RegionCodeGenTy &MaskedOpGen,
2254                                        SourceLocation Loc, const Expr *Filter) {
2255   if (!CGF.HaveInsertPoint())
2256     return;
2257   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2258   //   MaskedOpGen();
2259   //   __kmpc_end_masked(iden_t *, gtid);
2260   // }
2261   // Prepare arguments and build a call to __kmpc_masked
2262   llvm::Value *FilterVal = Filter
2263                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2264                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2265   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2266                          FilterVal};
2267   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2268                             getThreadID(CGF, Loc)};
2269   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2270                             CGM.getModule(), OMPRTL___kmpc_masked),
2271                         Args,
2272                         OMPBuilder.getOrCreateRuntimeFunction(
2273                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2274                         ArgsEnd,
2275                         /*Conditional=*/true);
2276   MaskedOpGen.setAction(Action);
2277   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2278   Action.Done(CGF);
2279 }
2280 
2281 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2282                                         SourceLocation Loc) {
2283   if (!CGF.HaveInsertPoint())
2284     return;
2285   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2286     OMPBuilder.createTaskyield(CGF.Builder);
2287   } else {
2288     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2289     llvm::Value *Args[] = {
2290         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2291         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2292     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2293                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2294                         Args);
2295   }
2296 
2297   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2298     Region->emitUntiedSwitch(CGF);
2299 }
2300 
2301 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2302                                           const RegionCodeGenTy &TaskgroupOpGen,
2303                                           SourceLocation Loc) {
2304   if (!CGF.HaveInsertPoint())
2305     return;
2306   // __kmpc_taskgroup(ident_t *, gtid);
2307   // TaskgroupOpGen();
2308   // __kmpc_end_taskgroup(ident_t *, gtid);
2309   // Prepare arguments and build a call to __kmpc_taskgroup
2310   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2311   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2312                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2313                         Args,
2314                         OMPBuilder.getOrCreateRuntimeFunction(
2315                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2316                         Args);
2317   TaskgroupOpGen.setAction(Action);
2318   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2319 }
2320 
2321 /// Given an array of pointers to variables, project the address of a
2322 /// given variable.
2323 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2324                                       unsigned Index, const VarDecl *Var) {
2325   // Pull out the pointer to the variable.
2326   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2327   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2328 
2329   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2330   return Address(
2331       CGF.Builder.CreateBitCast(
2332           Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2333       ElemTy, CGF.getContext().getDeclAlign(Var));
2334 }
2335 
2336 static llvm::Value *emitCopyprivateCopyFunction(
2337     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2338     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2339     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2340     SourceLocation Loc) {
2341   ASTContext &C = CGM.getContext();
2342   // void copy_func(void *LHSArg, void *RHSArg);
2343   FunctionArgList Args;
2344   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2345                            ImplicitParamDecl::Other);
2346   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2347                            ImplicitParamDecl::Other);
2348   Args.push_back(&LHSArg);
2349   Args.push_back(&RHSArg);
2350   const auto &CGFI =
2351       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2352   std::string Name =
2353       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2354   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2355                                     llvm::GlobalValue::InternalLinkage, Name,
2356                                     &CGM.getModule());
2357   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2358   Fn->setDoesNotRecurse();
2359   CodeGenFunction CGF(CGM);
2360   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2361   // Dest = (void*[n])(LHSArg);
2362   // Src = (void*[n])(RHSArg);
2363   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2364                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2365                   ArgsElemType->getPointerTo()),
2366               ArgsElemType, CGF.getPointerAlign());
2367   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2368                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2369                   ArgsElemType->getPointerTo()),
2370               ArgsElemType, CGF.getPointerAlign());
2371   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2372   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2373   // ...
2374   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2375   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2376     const auto *DestVar =
2377         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2378     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2379 
2380     const auto *SrcVar =
2381         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2382     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2383 
2384     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2385     QualType Type = VD->getType();
2386     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2387   }
2388   CGF.FinishFunction();
2389   return Fn;
2390 }
2391 
2392 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2393                                        const RegionCodeGenTy &SingleOpGen,
2394                                        SourceLocation Loc,
2395                                        ArrayRef<const Expr *> CopyprivateVars,
2396                                        ArrayRef<const Expr *> SrcExprs,
2397                                        ArrayRef<const Expr *> DstExprs,
2398                                        ArrayRef<const Expr *> AssignmentOps) {
2399   if (!CGF.HaveInsertPoint())
2400     return;
2401   assert(CopyprivateVars.size() == SrcExprs.size() &&
2402          CopyprivateVars.size() == DstExprs.size() &&
2403          CopyprivateVars.size() == AssignmentOps.size());
2404   ASTContext &C = CGM.getContext();
2405   // int32 did_it = 0;
2406   // if(__kmpc_single(ident_t *, gtid)) {
2407   //   SingleOpGen();
2408   //   __kmpc_end_single(ident_t *, gtid);
2409   //   did_it = 1;
2410   // }
2411   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2412   // <copy_func>, did_it);
2413 
2414   Address DidIt = Address::invalid();
2415   if (!CopyprivateVars.empty()) {
2416     // int32 did_it = 0;
2417     QualType KmpInt32Ty =
2418         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2419     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2420     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2421   }
2422   // Prepare arguments and build a call to __kmpc_single
2423   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2424   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2425                             CGM.getModule(), OMPRTL___kmpc_single),
2426                         Args,
2427                         OMPBuilder.getOrCreateRuntimeFunction(
2428                             CGM.getModule(), OMPRTL___kmpc_end_single),
2429                         Args,
2430                         /*Conditional=*/true);
2431   SingleOpGen.setAction(Action);
2432   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2433   if (DidIt.isValid()) {
2434     // did_it = 1;
2435     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2436   }
2437   Action.Done(CGF);
2438   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2439   // <copy_func>, did_it);
2440   if (DidIt.isValid()) {
2441     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2442     QualType CopyprivateArrayTy = C.getConstantArrayType(
2443         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2444         /*IndexTypeQuals=*/0);
2445     // Create a list of all private variables for copyprivate.
2446     Address CopyprivateList =
2447         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2448     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2449       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2450       CGF.Builder.CreateStore(
2451           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2452               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2453               CGF.VoidPtrTy),
2454           Elem);
2455     }
2456     // Build function that copies private values from single region to all other
2457     // threads in the corresponding parallel region.
2458     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2459         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2460         SrcExprs, DstExprs, AssignmentOps, Loc);
2461     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2462     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2463         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2464     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2465     llvm::Value *Args[] = {
2466         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2467         getThreadID(CGF, Loc),        // i32 <gtid>
2468         BufSize,                      // size_t <buf_size>
2469         CL.getPointer(),              // void *<copyprivate list>
2470         CpyFn,                        // void (*) (void *, void *) <copy_func>
2471         DidItVal                      // i32 did_it
2472     };
2473     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2474                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2475                         Args);
2476   }
2477 }
2478 
2479 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2480                                         const RegionCodeGenTy &OrderedOpGen,
2481                                         SourceLocation Loc, bool IsThreads) {
2482   if (!CGF.HaveInsertPoint())
2483     return;
2484   // __kmpc_ordered(ident_t *, gtid);
2485   // OrderedOpGen();
2486   // __kmpc_end_ordered(ident_t *, gtid);
2487   // Prepare arguments and build a call to __kmpc_ordered
2488   if (IsThreads) {
2489     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2490     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2491                               CGM.getModule(), OMPRTL___kmpc_ordered),
2492                           Args,
2493                           OMPBuilder.getOrCreateRuntimeFunction(
2494                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2495                           Args);
2496     OrderedOpGen.setAction(Action);
2497     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2498     return;
2499   }
2500   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2501 }
2502 
2503 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2504   unsigned Flags;
2505   if (Kind == OMPD_for)
2506     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2507   else if (Kind == OMPD_sections)
2508     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2509   else if (Kind == OMPD_single)
2510     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2511   else if (Kind == OMPD_barrier)
2512     Flags = OMP_IDENT_BARRIER_EXPL;
2513   else
2514     Flags = OMP_IDENT_BARRIER_IMPL;
2515   return Flags;
2516 }
2517 
2518 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2519     CodeGenFunction &CGF, const OMPLoopDirective &S,
2520     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2521   // Check if the loop directive is actually a doacross loop directive. In this
2522   // case choose static, 1 schedule.
2523   if (llvm::any_of(
2524           S.getClausesOfKind<OMPOrderedClause>(),
2525           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2526     ScheduleKind = OMPC_SCHEDULE_static;
2527     // Chunk size is 1 in this case.
2528     llvm::APInt ChunkSize(32, 1);
2529     ChunkExpr = IntegerLiteral::Create(
2530         CGF.getContext(), ChunkSize,
2531         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2532         SourceLocation());
2533   }
2534 }
2535 
2536 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2537                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2538                                       bool ForceSimpleCall) {
2539   // Check if we should use the OMPBuilder
2540   auto *OMPRegionInfo =
2541       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2542   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2543     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2544         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2545     return;
2546   }
2547 
2548   if (!CGF.HaveInsertPoint())
2549     return;
2550   // Build call __kmpc_cancel_barrier(loc, thread_id);
2551   // Build call __kmpc_barrier(loc, thread_id);
2552   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2553   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2554   // thread_id);
2555   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2556                          getThreadID(CGF, Loc)};
2557   if (OMPRegionInfo) {
2558     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2559       llvm::Value *Result = CGF.EmitRuntimeCall(
2560           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2561                                                 OMPRTL___kmpc_cancel_barrier),
2562           Args);
2563       if (EmitChecks) {
2564         // if (__kmpc_cancel_barrier()) {
2565         //   exit from construct;
2566         // }
2567         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2568         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2569         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2570         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2571         CGF.EmitBlock(ExitBB);
2572         //   exit from construct;
2573         CodeGenFunction::JumpDest CancelDestination =
2574             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2575         CGF.EmitBranchThroughCleanup(CancelDestination);
2576         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2577       }
2578       return;
2579     }
2580   }
2581   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2582                           CGM.getModule(), OMPRTL___kmpc_barrier),
2583                       Args);
2584 }
2585 
2586 /// Map the OpenMP loop schedule to the runtime enumeration.
2587 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2588                                           bool Chunked, bool Ordered) {
2589   switch (ScheduleKind) {
2590   case OMPC_SCHEDULE_static:
2591     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2592                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2593   case OMPC_SCHEDULE_dynamic:
2594     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2595   case OMPC_SCHEDULE_guided:
2596     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2597   case OMPC_SCHEDULE_runtime:
2598     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2599   case OMPC_SCHEDULE_auto:
2600     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2601   case OMPC_SCHEDULE_unknown:
2602     assert(!Chunked && "chunk was specified but schedule kind not known");
2603     return Ordered ? OMP_ord_static : OMP_sch_static;
2604   }
2605   llvm_unreachable("Unexpected runtime schedule");
2606 }
2607 
2608 /// Map the OpenMP distribute schedule to the runtime enumeration.
2609 static OpenMPSchedType
2610 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2611   // only static is allowed for dist_schedule
2612   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2613 }
2614 
2615 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2616                                          bool Chunked) const {
2617   OpenMPSchedType Schedule =
2618       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2619   return Schedule == OMP_sch_static;
2620 }
2621 
2622 bool CGOpenMPRuntime::isStaticNonchunked(
2623     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2624   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2625   return Schedule == OMP_dist_sch_static;
2626 }
2627 
2628 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2629                                       bool Chunked) const {
2630   OpenMPSchedType Schedule =
2631       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2632   return Schedule == OMP_sch_static_chunked;
2633 }
2634 
2635 bool CGOpenMPRuntime::isStaticChunked(
2636     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2637   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2638   return Schedule == OMP_dist_sch_static_chunked;
2639 }
2640 
2641 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2642   OpenMPSchedType Schedule =
2643       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2644   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2645   return Schedule != OMP_sch_static;
2646 }
2647 
2648 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2649                                   OpenMPScheduleClauseModifier M1,
2650                                   OpenMPScheduleClauseModifier M2) {
2651   int Modifier = 0;
2652   switch (M1) {
2653   case OMPC_SCHEDULE_MODIFIER_monotonic:
2654     Modifier = OMP_sch_modifier_monotonic;
2655     break;
2656   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2657     Modifier = OMP_sch_modifier_nonmonotonic;
2658     break;
2659   case OMPC_SCHEDULE_MODIFIER_simd:
2660     if (Schedule == OMP_sch_static_chunked)
2661       Schedule = OMP_sch_static_balanced_chunked;
2662     break;
2663   case OMPC_SCHEDULE_MODIFIER_last:
2664   case OMPC_SCHEDULE_MODIFIER_unknown:
2665     break;
2666   }
2667   switch (M2) {
2668   case OMPC_SCHEDULE_MODIFIER_monotonic:
2669     Modifier = OMP_sch_modifier_monotonic;
2670     break;
2671   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2672     Modifier = OMP_sch_modifier_nonmonotonic;
2673     break;
2674   case OMPC_SCHEDULE_MODIFIER_simd:
2675     if (Schedule == OMP_sch_static_chunked)
2676       Schedule = OMP_sch_static_balanced_chunked;
2677     break;
2678   case OMPC_SCHEDULE_MODIFIER_last:
2679   case OMPC_SCHEDULE_MODIFIER_unknown:
2680     break;
2681   }
2682   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2683   // If the static schedule kind is specified or if the ordered clause is
2684   // specified, and if the nonmonotonic modifier is not specified, the effect is
2685   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2686   // modifier is specified, the effect is as if the nonmonotonic modifier is
2687   // specified.
2688   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2689     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2690           Schedule == OMP_sch_static_balanced_chunked ||
2691           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2692           Schedule == OMP_dist_sch_static_chunked ||
2693           Schedule == OMP_dist_sch_static))
2694       Modifier = OMP_sch_modifier_nonmonotonic;
2695   }
2696   return Schedule | Modifier;
2697 }
2698 
2699 void CGOpenMPRuntime::emitForDispatchInit(
2700     CodeGenFunction &CGF, SourceLocation Loc,
2701     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2702     bool Ordered, const DispatchRTInput &DispatchValues) {
2703   if (!CGF.HaveInsertPoint())
2704     return;
2705   OpenMPSchedType Schedule = getRuntimeSchedule(
2706       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2707   assert(Ordered ||
2708          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2709           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2710           Schedule != OMP_sch_static_balanced_chunked));
2711   // Call __kmpc_dispatch_init(
2712   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2713   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2714   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2715 
2716   // If the Chunk was not specified in the clause - use default value 1.
2717   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2718                                             : CGF.Builder.getIntN(IVSize, 1);
2719   llvm::Value *Args[] = {
2720       emitUpdateLocation(CGF, Loc),
2721       getThreadID(CGF, Loc),
2722       CGF.Builder.getInt32(addMonoNonMonoModifier(
2723           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2724       DispatchValues.LB,                                     // Lower
2725       DispatchValues.UB,                                     // Upper
2726       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2727       Chunk                                                  // Chunk
2728   };
2729   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2730 }
2731 
2732 static void emitForStaticInitCall(
2733     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2734     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2735     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2736     const CGOpenMPRuntime::StaticRTInput &Values) {
2737   if (!CGF.HaveInsertPoint())
2738     return;
2739 
2740   assert(!Values.Ordered);
2741   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2742          Schedule == OMP_sch_static_balanced_chunked ||
2743          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2744          Schedule == OMP_dist_sch_static ||
2745          Schedule == OMP_dist_sch_static_chunked);
2746 
2747   // Call __kmpc_for_static_init(
2748   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2749   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2750   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2751   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2752   llvm::Value *Chunk = Values.Chunk;
2753   if (Chunk == nullptr) {
2754     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2755             Schedule == OMP_dist_sch_static) &&
2756            "expected static non-chunked schedule");
2757     // If the Chunk was not specified in the clause - use default value 1.
2758     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2759   } else {
2760     assert((Schedule == OMP_sch_static_chunked ||
2761             Schedule == OMP_sch_static_balanced_chunked ||
2762             Schedule == OMP_ord_static_chunked ||
2763             Schedule == OMP_dist_sch_static_chunked) &&
2764            "expected static chunked schedule");
2765   }
2766   llvm::Value *Args[] = {
2767       UpdateLocation,
2768       ThreadId,
2769       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2770                                                   M2)), // Schedule type
2771       Values.IL.getPointer(),                           // &isLastIter
2772       Values.LB.getPointer(),                           // &LB
2773       Values.UB.getPointer(),                           // &UB
2774       Values.ST.getPointer(),                           // &Stride
2775       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2776       Chunk                                             // Chunk
2777   };
2778   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2779 }
2780 
2781 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2782                                         SourceLocation Loc,
2783                                         OpenMPDirectiveKind DKind,
2784                                         const OpenMPScheduleTy &ScheduleKind,
2785                                         const StaticRTInput &Values) {
2786   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2787       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2788   assert(isOpenMPWorksharingDirective(DKind) &&
2789          "Expected loop-based or sections-based directive.");
2790   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2791                                              isOpenMPLoopDirective(DKind)
2792                                                  ? OMP_IDENT_WORK_LOOP
2793                                                  : OMP_IDENT_WORK_SECTIONS);
2794   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2795   llvm::FunctionCallee StaticInitFunction =
2796       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2797   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2798   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2799                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2800 }
2801 
2802 void CGOpenMPRuntime::emitDistributeStaticInit(
2803     CodeGenFunction &CGF, SourceLocation Loc,
2804     OpenMPDistScheduleClauseKind SchedKind,
2805     const CGOpenMPRuntime::StaticRTInput &Values) {
2806   OpenMPSchedType ScheduleNum =
2807       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2808   llvm::Value *UpdatedLocation =
2809       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2810   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2811   llvm::FunctionCallee StaticInitFunction;
2812   bool isGPUDistribute =
2813       CGM.getLangOpts().OpenMPIsDevice &&
2814       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2815   StaticInitFunction = createForStaticInitFunction(
2816       Values.IVSize, Values.IVSigned, isGPUDistribute);
2817 
2818   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2819                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2820                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2821 }
2822 
2823 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2824                                           SourceLocation Loc,
2825                                           OpenMPDirectiveKind DKind) {
2826   if (!CGF.HaveInsertPoint())
2827     return;
2828   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2829   llvm::Value *Args[] = {
2830       emitUpdateLocation(CGF, Loc,
2831                          isOpenMPDistributeDirective(DKind)
2832                              ? OMP_IDENT_WORK_DISTRIBUTE
2833                              : isOpenMPLoopDirective(DKind)
2834                                    ? OMP_IDENT_WORK_LOOP
2835                                    : OMP_IDENT_WORK_SECTIONS),
2836       getThreadID(CGF, Loc)};
2837   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2838   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2839       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2840     CGF.EmitRuntimeCall(
2841         OMPBuilder.getOrCreateRuntimeFunction(
2842             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2843         Args);
2844   else
2845     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2846                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2847                         Args);
2848 }
2849 
2850 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2851                                                  SourceLocation Loc,
2852                                                  unsigned IVSize,
2853                                                  bool IVSigned) {
2854   if (!CGF.HaveInsertPoint())
2855     return;
2856   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2857   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2858   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2859 }
2860 
2861 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2862                                           SourceLocation Loc, unsigned IVSize,
2863                                           bool IVSigned, Address IL,
2864                                           Address LB, Address UB,
2865                                           Address ST) {
2866   // Call __kmpc_dispatch_next(
2867   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2868   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2869   //          kmp_int[32|64] *p_stride);
2870   llvm::Value *Args[] = {
2871       emitUpdateLocation(CGF, Loc),
2872       getThreadID(CGF, Loc),
2873       IL.getPointer(), // &isLastIter
2874       LB.getPointer(), // &Lower
2875       UB.getPointer(), // &Upper
2876       ST.getPointer()  // &Stride
2877   };
2878   llvm::Value *Call =
2879       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2880   return CGF.EmitScalarConversion(
2881       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2882       CGF.getContext().BoolTy, Loc);
2883 }
2884 
2885 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2886                                            llvm::Value *NumThreads,
2887                                            SourceLocation Loc) {
2888   if (!CGF.HaveInsertPoint())
2889     return;
2890   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2891   llvm::Value *Args[] = {
2892       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2893       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2894   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2895                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2896                       Args);
2897 }
2898 
2899 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2900                                          ProcBindKind ProcBind,
2901                                          SourceLocation Loc) {
2902   if (!CGF.HaveInsertPoint())
2903     return;
2904   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2905   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2906   llvm::Value *Args[] = {
2907       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2908       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2909   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2910                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2911                       Args);
2912 }
2913 
2914 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2915                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2916   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2917     OMPBuilder.createFlush(CGF.Builder);
2918   } else {
2919     if (!CGF.HaveInsertPoint())
2920       return;
2921     // Build call void __kmpc_flush(ident_t *loc)
2922     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2923                             CGM.getModule(), OMPRTL___kmpc_flush),
2924                         emitUpdateLocation(CGF, Loc));
2925   }
2926 }
2927 
2928 namespace {
2929 /// Indexes of fields for type kmp_task_t.
2930 enum KmpTaskTFields {
2931   /// List of shared variables.
2932   KmpTaskTShareds,
2933   /// Task routine.
2934   KmpTaskTRoutine,
2935   /// Partition id for the untied tasks.
2936   KmpTaskTPartId,
2937   /// Function with call of destructors for private variables.
2938   Data1,
2939   /// Task priority.
2940   Data2,
2941   /// (Taskloops only) Lower bound.
2942   KmpTaskTLowerBound,
2943   /// (Taskloops only) Upper bound.
2944   KmpTaskTUpperBound,
2945   /// (Taskloops only) Stride.
2946   KmpTaskTStride,
2947   /// (Taskloops only) Is last iteration flag.
2948   KmpTaskTLastIter,
2949   /// (Taskloops only) Reduction data.
2950   KmpTaskTReductions,
2951 };
2952 } // anonymous namespace
2953 
2954 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2955   return OffloadEntriesTargetRegion.empty() &&
2956          OffloadEntriesDeviceGlobalVar.empty();
2957 }
2958 
2959 /// Initialize target region entry.
2960 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2961     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2962                                     StringRef ParentName, unsigned LineNum,
2963                                     unsigned Order) {
2964   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2965                                              "only required for the device "
2966                                              "code generation.");
2967   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2968       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2969                                    OMPTargetRegionEntryTargetRegion);
2970   ++OffloadingEntriesNum;
2971 }
2972 
2973 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2974     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2975                                   StringRef ParentName, unsigned LineNum,
2976                                   llvm::Constant *Addr, llvm::Constant *ID,
2977                                   OMPTargetRegionEntryKind Flags) {
2978   // If we are emitting code for a target, the entry is already initialized,
2979   // only has to be registered.
2980   if (CGM.getLangOpts().OpenMPIsDevice) {
2981     // This could happen if the device compilation is invoked standalone.
2982     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2983       return;
2984     auto &Entry =
2985         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2986     Entry.setAddress(Addr);
2987     Entry.setID(ID);
2988     Entry.setFlags(Flags);
2989   } else {
2990     if (Flags ==
2991             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2992         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2993                                  /*IgnoreAddressId*/ true))
2994       return;
2995     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2996            "Target region entry already registered!");
2997     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2998     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2999     ++OffloadingEntriesNum;
3000   }
3001 }
3002 
3003 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3004     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3005     bool IgnoreAddressId) const {
3006   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3007   if (PerDevice == OffloadEntriesTargetRegion.end())
3008     return false;
3009   auto PerFile = PerDevice->second.find(FileID);
3010   if (PerFile == PerDevice->second.end())
3011     return false;
3012   auto PerParentName = PerFile->second.find(ParentName);
3013   if (PerParentName == PerFile->second.end())
3014     return false;
3015   auto PerLine = PerParentName->second.find(LineNum);
3016   if (PerLine == PerParentName->second.end())
3017     return false;
3018   // Fail if this entry is already registered.
3019   if (!IgnoreAddressId &&
3020       (PerLine->second.getAddress() || PerLine->second.getID()))
3021     return false;
3022   return true;
3023 }
3024 
3025 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3026     const OffloadTargetRegionEntryInfoActTy &Action) {
3027   // Scan all target region entries and perform the provided action.
3028   for (const auto &D : OffloadEntriesTargetRegion)
3029     for (const auto &F : D.second)
3030       for (const auto &P : F.second)
3031         for (const auto &L : P.second)
3032           Action(D.first, F.first, P.first(), L.first, L.second);
3033 }
3034 
3035 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3036     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3037                                        OMPTargetGlobalVarEntryKind Flags,
3038                                        unsigned Order) {
3039   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3040                                              "only required for the device "
3041                                              "code generation.");
3042   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3043   ++OffloadingEntriesNum;
3044 }
3045 
3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3047     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3048                                      CharUnits VarSize,
3049                                      OMPTargetGlobalVarEntryKind Flags,
3050                                      llvm::GlobalValue::LinkageTypes Linkage) {
3051   if (CGM.getLangOpts().OpenMPIsDevice) {
3052     // This could happen if the device compilation is invoked standalone.
3053     if (!hasDeviceGlobalVarEntryInfo(VarName))
3054       return;
3055     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3056     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3057       if (Entry.getVarSize().isZero()) {
3058         Entry.setVarSize(VarSize);
3059         Entry.setLinkage(Linkage);
3060       }
3061       return;
3062     }
3063     Entry.setVarSize(VarSize);
3064     Entry.setLinkage(Linkage);
3065     Entry.setAddress(Addr);
3066   } else {
3067     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3068       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3069       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3070              "Entry not initialized!");
3071       if (Entry.getVarSize().isZero()) {
3072         Entry.setVarSize(VarSize);
3073         Entry.setLinkage(Linkage);
3074       }
3075       return;
3076     }
3077     OffloadEntriesDeviceGlobalVar.try_emplace(
3078         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3079     ++OffloadingEntriesNum;
3080   }
3081 }
3082 
3083 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3084     actOnDeviceGlobalVarEntriesInfo(
3085         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3086   // Scan all target region entries and perform the provided action.
3087   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3088     Action(E.getKey(), E.getValue());
3089 }
3090 
3091 void CGOpenMPRuntime::createOffloadEntry(
3092     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3093     llvm::GlobalValue::LinkageTypes Linkage) {
3094   OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags);
3095 }
3096 
3097 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3098   // Emit the offloading entries and metadata so that the device codegen side
3099   // can easily figure out what to emit. The produced metadata looks like
3100   // this:
3101   //
3102   // !omp_offload.info = !{!1, ...}
3103   //
3104   // Right now we only generate metadata for function that contain target
3105   // regions.
3106 
3107   // If we are in simd mode or there are no entries, we don't need to do
3108   // anything.
3109   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3110     return;
3111 
3112   llvm::Module &M = CGM.getModule();
3113   llvm::LLVMContext &C = M.getContext();
3114   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3115                          SourceLocation, StringRef>,
3116               16>
3117       OrderedEntries(OffloadEntriesInfoManager.size());
3118   llvm::SmallVector<StringRef, 16> ParentFunctions(
3119       OffloadEntriesInfoManager.size());
3120 
3121   // Auxiliary methods to create metadata values and strings.
3122   auto &&GetMDInt = [this](unsigned V) {
3123     return llvm::ConstantAsMetadata::get(
3124         llvm::ConstantInt::get(CGM.Int32Ty, V));
3125   };
3126 
3127   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3128 
3129   // Create the offloading info metadata node.
3130   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3131 
3132   // Create function that emits metadata for each target region entry;
3133   auto &&TargetRegionMetadataEmitter =
3134       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3135        &GetMDString](
3136           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3137           unsigned Line,
3138           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3139         // Generate metadata for target regions. Each entry of this metadata
3140         // contains:
3141         // - Entry 0 -> Kind of this type of metadata (0).
3142         // - Entry 1 -> Device ID of the file where the entry was identified.
3143         // - Entry 2 -> File ID of the file where the entry was identified.
3144         // - Entry 3 -> Mangled name of the function where the entry was
3145         // identified.
3146         // - Entry 4 -> Line in the file where the entry was identified.
3147         // - Entry 5 -> Order the entry was created.
3148         // The first element of the metadata node is the kind.
3149         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3150                                  GetMDInt(FileID),      GetMDString(ParentName),
3151                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3152 
3153         SourceLocation Loc;
3154         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3155                   E = CGM.getContext().getSourceManager().fileinfo_end();
3156              I != E; ++I) {
3157           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3158               I->getFirst()->getUniqueID().getFile() == FileID) {
3159             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3160                 I->getFirst(), Line, 1);
3161             break;
3162           }
3163         }
3164         // Save this entry in the right position of the ordered entries array.
3165         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3166         ParentFunctions[E.getOrder()] = ParentName;
3167 
3168         // Add metadata to the named metadata node.
3169         MD->addOperand(llvm::MDNode::get(C, Ops));
3170       };
3171 
3172   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3173       TargetRegionMetadataEmitter);
3174 
3175   // Create function that emits metadata for each device global variable entry;
3176   auto &&DeviceGlobalVarMetadataEmitter =
3177       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3178        MD](StringRef MangledName,
3179            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3180                &E) {
3181         // Generate metadata for global variables. Each entry of this metadata
3182         // contains:
3183         // - Entry 0 -> Kind of this type of metadata (1).
3184         // - Entry 1 -> Mangled name of the variable.
3185         // - Entry 2 -> Declare target kind.
3186         // - Entry 3 -> Order the entry was created.
3187         // The first element of the metadata node is the kind.
3188         llvm::Metadata *Ops[] = {
3189             GetMDInt(E.getKind()), GetMDString(MangledName),
3190             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3191 
3192         // Save this entry in the right position of the ordered entries array.
3193         OrderedEntries[E.getOrder()] =
3194             std::make_tuple(&E, SourceLocation(), MangledName);
3195 
3196         // Add metadata to the named metadata node.
3197         MD->addOperand(llvm::MDNode::get(C, Ops));
3198       };
3199 
3200   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3201       DeviceGlobalVarMetadataEmitter);
3202 
3203   for (const auto &E : OrderedEntries) {
3204     assert(std::get<0>(E) && "All ordered entries must exist!");
3205     if (const auto *CE =
3206             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3207                 std::get<0>(E))) {
3208       if (!CE->getID() || !CE->getAddress()) {
3209         // Do not blame the entry if the parent funtion is not emitted.
3210         StringRef FnName = ParentFunctions[CE->getOrder()];
3211         if (!CGM.GetGlobalValue(FnName))
3212           continue;
3213         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3214             DiagnosticsEngine::Error,
3215             "Offloading entry for target region in %0 is incorrect: either the "
3216             "address or the ID is invalid.");
3217         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3218         continue;
3219       }
3220       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3221                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3222     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3223                                              OffloadEntryInfoDeviceGlobalVar>(
3224                    std::get<0>(E))) {
3225       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3226           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3227               CE->getFlags());
3228       switch (Flags) {
3229       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3230         if (CGM.getLangOpts().OpenMPIsDevice &&
3231             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3232           continue;
3233         if (!CE->getAddress()) {
3234           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3235               DiagnosticsEngine::Error, "Offloading entry for declare target "
3236                                         "variable %0 is incorrect: the "
3237                                         "address is invalid.");
3238           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3239           continue;
3240         }
3241         // The vaiable has no definition - no need to add the entry.
3242         if (CE->getVarSize().isZero())
3243           continue;
3244         break;
3245       }
3246       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3247         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3248                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3249                "Declaret target link address is set.");
3250         if (CGM.getLangOpts().OpenMPIsDevice)
3251           continue;
3252         if (!CE->getAddress()) {
3253           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3254               DiagnosticsEngine::Error,
3255               "Offloading entry for declare target variable is incorrect: the "
3256               "address is invalid.");
3257           CGM.getDiags().Report(DiagID);
3258           continue;
3259         }
3260         break;
3261       }
3262 
3263       // Hidden or internal symbols on the device are not externally visible. We
3264       // should not attempt to register them by creating an offloading entry.
3265       if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress()))
3266         if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
3267           continue;
3268 
3269       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3270                          CE->getVarSize().getQuantity(), Flags,
3271                          CE->getLinkage());
3272     } else {
3273       llvm_unreachable("Unsupported entry kind.");
3274     }
3275   }
3276 }
3277 
3278 /// Loads all the offload entries information from the host IR
3279 /// metadata.
3280 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3281   // If we are in target mode, load the metadata from the host IR. This code has
3282   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3283 
3284   if (!CGM.getLangOpts().OpenMPIsDevice)
3285     return;
3286 
3287   if (CGM.getLangOpts().OMPHostIRFile.empty())
3288     return;
3289 
3290   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3291   if (auto EC = Buf.getError()) {
3292     CGM.getDiags().Report(diag::err_cannot_open_file)
3293         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3294     return;
3295   }
3296 
3297   llvm::LLVMContext C;
3298   auto ME = expectedToErrorOrAndEmitErrors(
3299       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3300 
3301   if (auto EC = ME.getError()) {
3302     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3303         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3304     CGM.getDiags().Report(DiagID)
3305         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3306     return;
3307   }
3308 
3309   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3310   if (!MD)
3311     return;
3312 
3313   for (llvm::MDNode *MN : MD->operands()) {
3314     auto &&GetMDInt = [MN](unsigned Idx) {
3315       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3316       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3317     };
3318 
3319     auto &&GetMDString = [MN](unsigned Idx) {
3320       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3321       return V->getString();
3322     };
3323 
3324     switch (GetMDInt(0)) {
3325     default:
3326       llvm_unreachable("Unexpected metadata!");
3327       break;
3328     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3329         OffloadingEntryInfoTargetRegion:
3330       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3331           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3332           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3333           /*Order=*/GetMDInt(5));
3334       break;
3335     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3336         OffloadingEntryInfoDeviceGlobalVar:
3337       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3338           /*MangledName=*/GetMDString(1),
3339           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3340               /*Flags=*/GetMDInt(2)),
3341           /*Order=*/GetMDInt(3));
3342       break;
3343     }
3344   }
3345 }
3346 
3347 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3348   if (!KmpRoutineEntryPtrTy) {
3349     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3350     ASTContext &C = CGM.getContext();
3351     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3352     FunctionProtoType::ExtProtoInfo EPI;
3353     KmpRoutineEntryPtrQTy = C.getPointerType(
3354         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3355     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3356   }
3357 }
3358 
3359 namespace {
3360 struct PrivateHelpersTy {
3361   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3362                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3363       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3364         PrivateElemInit(PrivateElemInit) {}
3365   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3366   const Expr *OriginalRef = nullptr;
3367   const VarDecl *Original = nullptr;
3368   const VarDecl *PrivateCopy = nullptr;
3369   const VarDecl *PrivateElemInit = nullptr;
3370   bool isLocalPrivate() const {
3371     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3372   }
3373 };
3374 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3375 } // anonymous namespace
3376 
3377 static bool isAllocatableDecl(const VarDecl *VD) {
3378   const VarDecl *CVD = VD->getCanonicalDecl();
3379   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3380     return false;
3381   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3382   // Use the default allocation.
3383   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3384            !AA->getAllocator());
3385 }
3386 
3387 static RecordDecl *
3388 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3389   if (!Privates.empty()) {
3390     ASTContext &C = CGM.getContext();
3391     // Build struct .kmp_privates_t. {
3392     //         /*  private vars  */
3393     //       };
3394     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3395     RD->startDefinition();
3396     for (const auto &Pair : Privates) {
3397       const VarDecl *VD = Pair.second.Original;
3398       QualType Type = VD->getType().getNonReferenceType();
3399       // If the private variable is a local variable with lvalue ref type,
3400       // allocate the pointer instead of the pointee type.
3401       if (Pair.second.isLocalPrivate()) {
3402         if (VD->getType()->isLValueReferenceType())
3403           Type = C.getPointerType(Type);
3404         if (isAllocatableDecl(VD))
3405           Type = C.getPointerType(Type);
3406       }
3407       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3408       if (VD->hasAttrs()) {
3409         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3410              E(VD->getAttrs().end());
3411              I != E; ++I)
3412           FD->addAttr(*I);
3413       }
3414     }
3415     RD->completeDefinition();
3416     return RD;
3417   }
3418   return nullptr;
3419 }
3420 
3421 static RecordDecl *
3422 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3423                          QualType KmpInt32Ty,
3424                          QualType KmpRoutineEntryPointerQTy) {
3425   ASTContext &C = CGM.getContext();
3426   // Build struct kmp_task_t {
3427   //         void *              shareds;
3428   //         kmp_routine_entry_t routine;
3429   //         kmp_int32           part_id;
3430   //         kmp_cmplrdata_t data1;
3431   //         kmp_cmplrdata_t data2;
3432   // For taskloops additional fields:
3433   //         kmp_uint64          lb;
3434   //         kmp_uint64          ub;
3435   //         kmp_int64           st;
3436   //         kmp_int32           liter;
3437   //         void *              reductions;
3438   //       };
3439   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3440   UD->startDefinition();
3441   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3442   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3443   UD->completeDefinition();
3444   QualType KmpCmplrdataTy = C.getRecordType(UD);
3445   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3446   RD->startDefinition();
3447   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3448   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3449   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3450   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3451   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3452   if (isOpenMPTaskLoopDirective(Kind)) {
3453     QualType KmpUInt64Ty =
3454         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3455     QualType KmpInt64Ty =
3456         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3457     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3458     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3459     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3460     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3461     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3462   }
3463   RD->completeDefinition();
3464   return RD;
3465 }
3466 
3467 static RecordDecl *
3468 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3469                                      ArrayRef<PrivateDataTy> Privates) {
3470   ASTContext &C = CGM.getContext();
3471   // Build struct kmp_task_t_with_privates {
3472   //         kmp_task_t task_data;
3473   //         .kmp_privates_t. privates;
3474   //       };
3475   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3476   RD->startDefinition();
3477   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3478   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3479     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3480   RD->completeDefinition();
3481   return RD;
3482 }
3483 
3484 /// Emit a proxy function which accepts kmp_task_t as the second
3485 /// argument.
3486 /// \code
3487 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3488 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3489 ///   For taskloops:
3490 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3491 ///   tt->reductions, tt->shareds);
3492 ///   return 0;
3493 /// }
3494 /// \endcode
3495 static llvm::Function *
3496 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3497                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3498                       QualType KmpTaskTWithPrivatesPtrQTy,
3499                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3500                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3501                       llvm::Value *TaskPrivatesMap) {
3502   ASTContext &C = CGM.getContext();
3503   FunctionArgList Args;
3504   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3505                             ImplicitParamDecl::Other);
3506   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3507                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3508                                 ImplicitParamDecl::Other);
3509   Args.push_back(&GtidArg);
3510   Args.push_back(&TaskTypeArg);
3511   const auto &TaskEntryFnInfo =
3512       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3513   llvm::FunctionType *TaskEntryTy =
3514       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3515   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3516   auto *TaskEntry = llvm::Function::Create(
3517       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3518   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3519   TaskEntry->setDoesNotRecurse();
3520   CodeGenFunction CGF(CGM);
3521   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3522                     Loc, Loc);
3523 
3524   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3525   // tt,
3526   // For taskloops:
3527   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3528   // tt->task_data.shareds);
3529   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3530       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3531   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3532       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3533       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3534   const auto *KmpTaskTWithPrivatesQTyRD =
3535       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3536   LValue Base =
3537       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3538   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3539   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3540   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3541   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3542 
3543   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3544   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3545   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3546       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3547       CGF.ConvertTypeForMem(SharedsPtrTy));
3548 
3549   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3550   llvm::Value *PrivatesParam;
3551   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3552     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3553     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3554         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3555   } else {
3556     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3557   }
3558 
3559   llvm::Value *CommonArgs[] = {
3560       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3561       CGF.Builder
3562           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3563                                                CGF.VoidPtrTy, CGF.Int8Ty)
3564           .getPointer()};
3565   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3566                                           std::end(CommonArgs));
3567   if (isOpenMPTaskLoopDirective(Kind)) {
3568     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3569     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3570     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3571     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3572     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3573     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3574     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3575     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3576     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3577     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3578     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3579     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3580     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3581     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3582     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3583     CallArgs.push_back(LBParam);
3584     CallArgs.push_back(UBParam);
3585     CallArgs.push_back(StParam);
3586     CallArgs.push_back(LIParam);
3587     CallArgs.push_back(RParam);
3588   }
3589   CallArgs.push_back(SharedsParam);
3590 
3591   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3592                                                   CallArgs);
3593   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3594                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3595   CGF.FinishFunction();
3596   return TaskEntry;
3597 }
3598 
3599 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3600                                             SourceLocation Loc,
3601                                             QualType KmpInt32Ty,
3602                                             QualType KmpTaskTWithPrivatesPtrQTy,
3603                                             QualType KmpTaskTWithPrivatesQTy) {
3604   ASTContext &C = CGM.getContext();
3605   FunctionArgList Args;
3606   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3607                             ImplicitParamDecl::Other);
3608   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3609                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3610                                 ImplicitParamDecl::Other);
3611   Args.push_back(&GtidArg);
3612   Args.push_back(&TaskTypeArg);
3613   const auto &DestructorFnInfo =
3614       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3615   llvm::FunctionType *DestructorFnTy =
3616       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3617   std::string Name =
3618       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3619   auto *DestructorFn =
3620       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3621                              Name, &CGM.getModule());
3622   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3623                                     DestructorFnInfo);
3624   DestructorFn->setDoesNotRecurse();
3625   CodeGenFunction CGF(CGM);
3626   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3627                     Args, Loc, Loc);
3628 
3629   LValue Base = CGF.EmitLoadOfPointerLValue(
3630       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3631       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3632   const auto *KmpTaskTWithPrivatesQTyRD =
3633       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3634   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3635   Base = CGF.EmitLValueForField(Base, *FI);
3636   for (const auto *Field :
3637        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3638     if (QualType::DestructionKind DtorKind =
3639             Field->getType().isDestructedType()) {
3640       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3641       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3642     }
3643   }
3644   CGF.FinishFunction();
3645   return DestructorFn;
3646 }
3647 
3648 /// Emit a privates mapping function for correct handling of private and
3649 /// firstprivate variables.
3650 /// \code
3651 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3652 /// **noalias priv1,...,  <tyn> **noalias privn) {
3653 ///   *priv1 = &.privates.priv1;
3654 ///   ...;
3655 ///   *privn = &.privates.privn;
3656 /// }
3657 /// \endcode
3658 static llvm::Value *
3659 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3660                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3661                                ArrayRef<PrivateDataTy> Privates) {
3662   ASTContext &C = CGM.getContext();
3663   FunctionArgList Args;
3664   ImplicitParamDecl TaskPrivatesArg(
3665       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3666       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3667       ImplicitParamDecl::Other);
3668   Args.push_back(&TaskPrivatesArg);
3669   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3670   unsigned Counter = 1;
3671   for (const Expr *E : Data.PrivateVars) {
3672     Args.push_back(ImplicitParamDecl::Create(
3673         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3674         C.getPointerType(C.getPointerType(E->getType()))
3675             .withConst()
3676             .withRestrict(),
3677         ImplicitParamDecl::Other));
3678     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3679     PrivateVarsPos[VD] = Counter;
3680     ++Counter;
3681   }
3682   for (const Expr *E : Data.FirstprivateVars) {
3683     Args.push_back(ImplicitParamDecl::Create(
3684         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3685         C.getPointerType(C.getPointerType(E->getType()))
3686             .withConst()
3687             .withRestrict(),
3688         ImplicitParamDecl::Other));
3689     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3690     PrivateVarsPos[VD] = Counter;
3691     ++Counter;
3692   }
3693   for (const Expr *E : Data.LastprivateVars) {
3694     Args.push_back(ImplicitParamDecl::Create(
3695         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3696         C.getPointerType(C.getPointerType(E->getType()))
3697             .withConst()
3698             .withRestrict(),
3699         ImplicitParamDecl::Other));
3700     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3701     PrivateVarsPos[VD] = Counter;
3702     ++Counter;
3703   }
3704   for (const VarDecl *VD : Data.PrivateLocals) {
3705     QualType Ty = VD->getType().getNonReferenceType();
3706     if (VD->getType()->isLValueReferenceType())
3707       Ty = C.getPointerType(Ty);
3708     if (isAllocatableDecl(VD))
3709       Ty = C.getPointerType(Ty);
3710     Args.push_back(ImplicitParamDecl::Create(
3711         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3712         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3713         ImplicitParamDecl::Other));
3714     PrivateVarsPos[VD] = Counter;
3715     ++Counter;
3716   }
3717   const auto &TaskPrivatesMapFnInfo =
3718       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3719   llvm::FunctionType *TaskPrivatesMapTy =
3720       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3721   std::string Name =
3722       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3723   auto *TaskPrivatesMap = llvm::Function::Create(
3724       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3725       &CGM.getModule());
3726   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3727                                     TaskPrivatesMapFnInfo);
3728   if (CGM.getLangOpts().Optimize) {
3729     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3730     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3731     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3732   }
3733   CodeGenFunction CGF(CGM);
3734   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3735                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3736 
3737   // *privi = &.privates.privi;
3738   LValue Base = CGF.EmitLoadOfPointerLValue(
3739       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3740       TaskPrivatesArg.getType()->castAs<PointerType>());
3741   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3742   Counter = 0;
3743   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3744     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3745     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3746     LValue RefLVal =
3747         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3748     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3749         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3750     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3751     ++Counter;
3752   }
3753   CGF.FinishFunction();
3754   return TaskPrivatesMap;
3755 }
3756 
3757 /// Emit initialization for private variables in task-based directives.
3758 static void emitPrivatesInit(CodeGenFunction &CGF,
3759                              const OMPExecutableDirective &D,
3760                              Address KmpTaskSharedsPtr, LValue TDBase,
3761                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3762                              QualType SharedsTy, QualType SharedsPtrTy,
3763                              const OMPTaskDataTy &Data,
3764                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3765   ASTContext &C = CGF.getContext();
3766   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3767   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3768   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3769                                  ? OMPD_taskloop
3770                                  : OMPD_task;
3771   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3772   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3773   LValue SrcBase;
3774   bool IsTargetTask =
3775       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3776       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3777   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3778   // PointersArray, SizesArray, and MappersArray. The original variables for
3779   // these arrays are not captured and we get their addresses explicitly.
3780   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3781       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3782     SrcBase = CGF.MakeAddrLValue(
3783         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3784             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3785             CGF.ConvertTypeForMem(SharedsTy)),
3786         SharedsTy);
3787   }
3788   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3789   for (const PrivateDataTy &Pair : Privates) {
3790     // Do not initialize private locals.
3791     if (Pair.second.isLocalPrivate()) {
3792       ++FI;
3793       continue;
3794     }
3795     const VarDecl *VD = Pair.second.PrivateCopy;
3796     const Expr *Init = VD->getAnyInitializer();
3797     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3798                              !CGF.isTrivialInitializer(Init)))) {
3799       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3800       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3801         const VarDecl *OriginalVD = Pair.second.Original;
3802         // Check if the variable is the target-based BasePointersArray,
3803         // PointersArray, SizesArray, or MappersArray.
3804         LValue SharedRefLValue;
3805         QualType Type = PrivateLValue.getType();
3806         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3807         if (IsTargetTask && !SharedField) {
3808           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3809                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3810                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3811                          ->getNumParams() == 0 &&
3812                  isa<TranslationUnitDecl>(
3813                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3814                          ->getDeclContext()) &&
3815                  "Expected artificial target data variable.");
3816           SharedRefLValue =
3817               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3818         } else if (ForDup) {
3819           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3820           SharedRefLValue = CGF.MakeAddrLValue(
3821               SharedRefLValue.getAddress(CGF).withAlignment(
3822                   C.getDeclAlign(OriginalVD)),
3823               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3824               SharedRefLValue.getTBAAInfo());
3825         } else if (CGF.LambdaCaptureFields.count(
3826                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3827                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3828           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3829         } else {
3830           // Processing for implicitly captured variables.
3831           InlinedOpenMPRegionRAII Region(
3832               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3833               /*HasCancel=*/false, /*NoInheritance=*/true);
3834           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3835         }
3836         if (Type->isArrayType()) {
3837           // Initialize firstprivate array.
3838           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3839             // Perform simple memcpy.
3840             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3841           } else {
3842             // Initialize firstprivate array using element-by-element
3843             // initialization.
3844             CGF.EmitOMPAggregateAssign(
3845                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3846                 Type,
3847                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3848                                                   Address SrcElement) {
3849                   // Clean up any temporaries needed by the initialization.
3850                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3851                   InitScope.addPrivate(Elem, SrcElement);
3852                   (void)InitScope.Privatize();
3853                   // Emit initialization for single element.
3854                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3855                       CGF, &CapturesInfo);
3856                   CGF.EmitAnyExprToMem(Init, DestElement,
3857                                        Init->getType().getQualifiers(),
3858                                        /*IsInitializer=*/false);
3859                 });
3860           }
3861         } else {
3862           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3863           InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3864           (void)InitScope.Privatize();
3865           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3866           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3867                              /*capturedByInit=*/false);
3868         }
3869       } else {
3870         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3871       }
3872     }
3873     ++FI;
3874   }
3875 }
3876 
3877 /// Check if duplication function is required for taskloops.
3878 static bool checkInitIsRequired(CodeGenFunction &CGF,
3879                                 ArrayRef<PrivateDataTy> Privates) {
3880   bool InitRequired = false;
3881   for (const PrivateDataTy &Pair : Privates) {
3882     if (Pair.second.isLocalPrivate())
3883       continue;
3884     const VarDecl *VD = Pair.second.PrivateCopy;
3885     const Expr *Init = VD->getAnyInitializer();
3886     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3887                                     !CGF.isTrivialInitializer(Init));
3888     if (InitRequired)
3889       break;
3890   }
3891   return InitRequired;
3892 }
3893 
3894 
3895 /// Emit task_dup function (for initialization of
3896 /// private/firstprivate/lastprivate vars and last_iter flag)
3897 /// \code
3898 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3899 /// lastpriv) {
3900 /// // setup lastprivate flag
3901 ///    task_dst->last = lastpriv;
3902 /// // could be constructor calls here...
3903 /// }
3904 /// \endcode
3905 static llvm::Value *
3906 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3907                     const OMPExecutableDirective &D,
3908                     QualType KmpTaskTWithPrivatesPtrQTy,
3909                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3910                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3911                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3912                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3913   ASTContext &C = CGM.getContext();
3914   FunctionArgList Args;
3915   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3916                            KmpTaskTWithPrivatesPtrQTy,
3917                            ImplicitParamDecl::Other);
3918   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3919                            KmpTaskTWithPrivatesPtrQTy,
3920                            ImplicitParamDecl::Other);
3921   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3922                                 ImplicitParamDecl::Other);
3923   Args.push_back(&DstArg);
3924   Args.push_back(&SrcArg);
3925   Args.push_back(&LastprivArg);
3926   const auto &TaskDupFnInfo =
3927       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3928   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3929   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3930   auto *TaskDup = llvm::Function::Create(
3931       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3932   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3933   TaskDup->setDoesNotRecurse();
3934   CodeGenFunction CGF(CGM);
3935   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3936                     Loc);
3937 
3938   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3939       CGF.GetAddrOfLocalVar(&DstArg),
3940       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3941   // task_dst->liter = lastpriv;
3942   if (WithLastIter) {
3943     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3944     LValue Base = CGF.EmitLValueForField(
3945         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3946     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3947     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3948         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3949     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3950   }
3951 
3952   // Emit initial values for private copies (if any).
3953   assert(!Privates.empty());
3954   Address KmpTaskSharedsPtr = Address::invalid();
3955   if (!Data.FirstprivateVars.empty()) {
3956     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3957         CGF.GetAddrOfLocalVar(&SrcArg),
3958         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3959     LValue Base = CGF.EmitLValueForField(
3960         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3961     KmpTaskSharedsPtr = Address(
3962         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3963                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3964                                                   KmpTaskTShareds)),
3965                              Loc),
3966         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3967   }
3968   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3969                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3970   CGF.FinishFunction();
3971   return TaskDup;
3972 }
3973 
3974 /// Checks if destructor function is required to be generated.
3975 /// \return true if cleanups are required, false otherwise.
3976 static bool
3977 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3978                          ArrayRef<PrivateDataTy> Privates) {
3979   for (const PrivateDataTy &P : Privates) {
3980     if (P.second.isLocalPrivate())
3981       continue;
3982     QualType Ty = P.second.Original->getType().getNonReferenceType();
3983     if (Ty.isDestructedType())
3984       return true;
3985   }
3986   return false;
3987 }
3988 
3989 namespace {
3990 /// Loop generator for OpenMP iterator expression.
3991 class OMPIteratorGeneratorScope final
3992     : public CodeGenFunction::OMPPrivateScope {
3993   CodeGenFunction &CGF;
3994   const OMPIteratorExpr *E = nullptr;
3995   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3996   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3997   OMPIteratorGeneratorScope() = delete;
3998   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3999 
4000 public:
4001   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4002       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4003     if (!E)
4004       return;
4005     SmallVector<llvm::Value *, 4> Uppers;
4006     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4007       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4008       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4009       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
4010       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4011       addPrivate(
4012           HelperData.CounterVD,
4013           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
4014     }
4015     Privatize();
4016 
4017     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4018       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4019       LValue CLVal =
4020           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4021                              HelperData.CounterVD->getType());
4022       // Counter = 0;
4023       CGF.EmitStoreOfScalar(
4024           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4025           CLVal);
4026       CodeGenFunction::JumpDest &ContDest =
4027           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4028       CodeGenFunction::JumpDest &ExitDest =
4029           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4030       // N = <number-of_iterations>;
4031       llvm::Value *N = Uppers[I];
4032       // cont:
4033       // if (Counter < N) goto body; else goto exit;
4034       CGF.EmitBlock(ContDest.getBlock());
4035       auto *CVal =
4036           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4037       llvm::Value *Cmp =
4038           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4039               ? CGF.Builder.CreateICmpSLT(CVal, N)
4040               : CGF.Builder.CreateICmpULT(CVal, N);
4041       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4042       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4043       // body:
4044       CGF.EmitBlock(BodyBB);
4045       // Iteri = Begini + Counter * Stepi;
4046       CGF.EmitIgnoredExpr(HelperData.Update);
4047     }
4048   }
4049   ~OMPIteratorGeneratorScope() {
4050     if (!E)
4051       return;
4052     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4053       // Counter = Counter + 1;
4054       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4055       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4056       // goto cont;
4057       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4058       // exit:
4059       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4060     }
4061   }
4062 };
4063 } // namespace
4064 
4065 static std::pair<llvm::Value *, llvm::Value *>
4066 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4067   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4068   llvm::Value *Addr;
4069   if (OASE) {
4070     const Expr *Base = OASE->getBase();
4071     Addr = CGF.EmitScalarExpr(Base);
4072   } else {
4073     Addr = CGF.EmitLValue(E).getPointer(CGF);
4074   }
4075   llvm::Value *SizeVal;
4076   QualType Ty = E->getType();
4077   if (OASE) {
4078     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4079     for (const Expr *SE : OASE->getDimensions()) {
4080       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4081       Sz = CGF.EmitScalarConversion(
4082           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4083       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4084     }
4085   } else if (const auto *ASE =
4086                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4087     LValue UpAddrLVal =
4088         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4089     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4090     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4091         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4092     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4093     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4094     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4095   } else {
4096     SizeVal = CGF.getTypeSize(Ty);
4097   }
4098   return std::make_pair(Addr, SizeVal);
4099 }
4100 
4101 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4102 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4103   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4104   if (KmpTaskAffinityInfoTy.isNull()) {
4105     RecordDecl *KmpAffinityInfoRD =
4106         C.buildImplicitRecord("kmp_task_affinity_info_t");
4107     KmpAffinityInfoRD->startDefinition();
4108     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4109     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4110     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4111     KmpAffinityInfoRD->completeDefinition();
4112     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4113   }
4114 }
4115 
4116 CGOpenMPRuntime::TaskResultTy
4117 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4118                               const OMPExecutableDirective &D,
4119                               llvm::Function *TaskFunction, QualType SharedsTy,
4120                               Address Shareds, const OMPTaskDataTy &Data) {
4121   ASTContext &C = CGM.getContext();
4122   llvm::SmallVector<PrivateDataTy, 4> Privates;
4123   // Aggregate privates and sort them by the alignment.
4124   const auto *I = Data.PrivateCopies.begin();
4125   for (const Expr *E : Data.PrivateVars) {
4126     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4127     Privates.emplace_back(
4128         C.getDeclAlign(VD),
4129         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4130                          /*PrivateElemInit=*/nullptr));
4131     ++I;
4132   }
4133   I = Data.FirstprivateCopies.begin();
4134   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4135   for (const Expr *E : Data.FirstprivateVars) {
4136     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4137     Privates.emplace_back(
4138         C.getDeclAlign(VD),
4139         PrivateHelpersTy(
4140             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4141             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4142     ++I;
4143     ++IElemInitRef;
4144   }
4145   I = Data.LastprivateCopies.begin();
4146   for (const Expr *E : Data.LastprivateVars) {
4147     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4148     Privates.emplace_back(
4149         C.getDeclAlign(VD),
4150         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4151                          /*PrivateElemInit=*/nullptr));
4152     ++I;
4153   }
4154   for (const VarDecl *VD : Data.PrivateLocals) {
4155     if (isAllocatableDecl(VD))
4156       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4157     else
4158       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4159   }
4160   llvm::stable_sort(Privates,
4161                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4162                       return L.first > R.first;
4163                     });
4164   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4165   // Build type kmp_routine_entry_t (if not built yet).
4166   emitKmpRoutineEntryT(KmpInt32Ty);
4167   // Build type kmp_task_t (if not built yet).
4168   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4169     if (SavedKmpTaskloopTQTy.isNull()) {
4170       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4171           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4172     }
4173     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4174   } else {
4175     assert((D.getDirectiveKind() == OMPD_task ||
4176             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4177             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4178            "Expected taskloop, task or target directive");
4179     if (SavedKmpTaskTQTy.isNull()) {
4180       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4181           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4182     }
4183     KmpTaskTQTy = SavedKmpTaskTQTy;
4184   }
4185   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4186   // Build particular struct kmp_task_t for the given task.
4187   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4188       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4189   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4190   QualType KmpTaskTWithPrivatesPtrQTy =
4191       C.getPointerType(KmpTaskTWithPrivatesQTy);
4192   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4193   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4194       KmpTaskTWithPrivatesTy->getPointerTo();
4195   llvm::Value *KmpTaskTWithPrivatesTySize =
4196       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4197   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4198 
4199   // Emit initial values for private copies (if any).
4200   llvm::Value *TaskPrivatesMap = nullptr;
4201   llvm::Type *TaskPrivatesMapTy =
4202       std::next(TaskFunction->arg_begin(), 3)->getType();
4203   if (!Privates.empty()) {
4204     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4205     TaskPrivatesMap =
4206         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4207     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4208         TaskPrivatesMap, TaskPrivatesMapTy);
4209   } else {
4210     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4211         cast<llvm::PointerType>(TaskPrivatesMapTy));
4212   }
4213   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4214   // kmp_task_t *tt);
4215   llvm::Function *TaskEntry = emitProxyTaskFunction(
4216       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4217       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4218       TaskPrivatesMap);
4219 
4220   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4221   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4222   // kmp_routine_entry_t *task_entry);
4223   // Task flags. Format is taken from
4224   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4225   // description of kmp_tasking_flags struct.
4226   enum {
4227     TiedFlag = 0x1,
4228     FinalFlag = 0x2,
4229     DestructorsFlag = 0x8,
4230     PriorityFlag = 0x20,
4231     DetachableFlag = 0x40,
4232   };
4233   unsigned Flags = Data.Tied ? TiedFlag : 0;
4234   bool NeedsCleanup = false;
4235   if (!Privates.empty()) {
4236     NeedsCleanup =
4237         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4238     if (NeedsCleanup)
4239       Flags = Flags | DestructorsFlag;
4240   }
4241   if (Data.Priority.getInt())
4242     Flags = Flags | PriorityFlag;
4243   if (D.hasClausesOfKind<OMPDetachClause>())
4244     Flags = Flags | DetachableFlag;
4245   llvm::Value *TaskFlags =
4246       Data.Final.getPointer()
4247           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4248                                      CGF.Builder.getInt32(FinalFlag),
4249                                      CGF.Builder.getInt32(/*C=*/0))
4250           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4251   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4252   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4253   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4254       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4255       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4256           TaskEntry, KmpRoutineEntryPtrTy)};
4257   llvm::Value *NewTask;
4258   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4259     // Check if we have any device clause associated with the directive.
4260     const Expr *Device = nullptr;
4261     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4262       Device = C->getDevice();
4263     // Emit device ID if any otherwise use default value.
4264     llvm::Value *DeviceID;
4265     if (Device)
4266       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4267                                            CGF.Int64Ty, /*isSigned=*/true);
4268     else
4269       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4270     AllocArgs.push_back(DeviceID);
4271     NewTask = CGF.EmitRuntimeCall(
4272         OMPBuilder.getOrCreateRuntimeFunction(
4273             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4274         AllocArgs);
4275   } else {
4276     NewTask =
4277         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4278                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4279                             AllocArgs);
4280   }
4281   // Emit detach clause initialization.
4282   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4283   // task_descriptor);
4284   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4285     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4286     LValue EvtLVal = CGF.EmitLValue(Evt);
4287 
4288     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4289     // int gtid, kmp_task_t *task);
4290     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4291     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4292     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4293     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4294         OMPBuilder.getOrCreateRuntimeFunction(
4295             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4296         {Loc, Tid, NewTask});
4297     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4298                                       Evt->getExprLoc());
4299     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4300   }
4301   // Process affinity clauses.
4302   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4303     // Process list of affinity data.
4304     ASTContext &C = CGM.getContext();
4305     Address AffinitiesArray = Address::invalid();
4306     // Calculate number of elements to form the array of affinity data.
4307     llvm::Value *NumOfElements = nullptr;
4308     unsigned NumAffinities = 0;
4309     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4310       if (const Expr *Modifier = C->getModifier()) {
4311         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4312         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4313           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4314           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4315           NumOfElements =
4316               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4317         }
4318       } else {
4319         NumAffinities += C->varlist_size();
4320       }
4321     }
4322     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4323     // Fields ids in kmp_task_affinity_info record.
4324     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4325 
4326     QualType KmpTaskAffinityInfoArrayTy;
4327     if (NumOfElements) {
4328       NumOfElements = CGF.Builder.CreateNUWAdd(
4329           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4330       auto *OVE = new (C) OpaqueValueExpr(
4331           Loc,
4332           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4333           VK_PRValue);
4334       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4335                                                     RValue::get(NumOfElements));
4336       KmpTaskAffinityInfoArrayTy =
4337           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4338                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4339       // Properly emit variable-sized array.
4340       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4341                                            ImplicitParamDecl::Other);
4342       CGF.EmitVarDecl(*PD);
4343       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4344       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4345                                                 /*isSigned=*/false);
4346     } else {
4347       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4348           KmpTaskAffinityInfoTy,
4349           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4350           ArrayType::Normal, /*IndexTypeQuals=*/0);
4351       AffinitiesArray =
4352           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4353       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4354       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4355                                              /*isSigned=*/false);
4356     }
4357 
4358     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4359     // Fill array by elements without iterators.
4360     unsigned Pos = 0;
4361     bool HasIterator = false;
4362     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4363       if (C->getModifier()) {
4364         HasIterator = true;
4365         continue;
4366       }
4367       for (const Expr *E : C->varlists()) {
4368         llvm::Value *Addr;
4369         llvm::Value *Size;
4370         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4371         LValue Base =
4372             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4373                                KmpTaskAffinityInfoTy);
4374         // affs[i].base_addr = &<Affinities[i].second>;
4375         LValue BaseAddrLVal = CGF.EmitLValueForField(
4376             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4377         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4378                               BaseAddrLVal);
4379         // affs[i].len = sizeof(<Affinities[i].second>);
4380         LValue LenLVal = CGF.EmitLValueForField(
4381             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4382         CGF.EmitStoreOfScalar(Size, LenLVal);
4383         ++Pos;
4384       }
4385     }
4386     LValue PosLVal;
4387     if (HasIterator) {
4388       PosLVal = CGF.MakeAddrLValue(
4389           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4390           C.getSizeType());
4391       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4392     }
4393     // Process elements with iterators.
4394     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4395       const Expr *Modifier = C->getModifier();
4396       if (!Modifier)
4397         continue;
4398       OMPIteratorGeneratorScope IteratorScope(
4399           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4400       for (const Expr *E : C->varlists()) {
4401         llvm::Value *Addr;
4402         llvm::Value *Size;
4403         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4404         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4405         LValue Base = CGF.MakeAddrLValue(
4406             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4407         // affs[i].base_addr = &<Affinities[i].second>;
4408         LValue BaseAddrLVal = CGF.EmitLValueForField(
4409             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4410         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4411                               BaseAddrLVal);
4412         // affs[i].len = sizeof(<Affinities[i].second>);
4413         LValue LenLVal = CGF.EmitLValueForField(
4414             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4415         CGF.EmitStoreOfScalar(Size, LenLVal);
4416         Idx = CGF.Builder.CreateNUWAdd(
4417             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4418         CGF.EmitStoreOfScalar(Idx, PosLVal);
4419       }
4420     }
4421     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4422     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4423     // naffins, kmp_task_affinity_info_t *affin_list);
4424     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4425     llvm::Value *GTid = getThreadID(CGF, Loc);
4426     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4427         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4428     // FIXME: Emit the function and ignore its result for now unless the
4429     // runtime function is properly implemented.
4430     (void)CGF.EmitRuntimeCall(
4431         OMPBuilder.getOrCreateRuntimeFunction(
4432             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4433         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4434   }
4435   llvm::Value *NewTaskNewTaskTTy =
4436       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4437           NewTask, KmpTaskTWithPrivatesPtrTy);
4438   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4439                                                KmpTaskTWithPrivatesQTy);
4440   LValue TDBase =
4441       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4442   // Fill the data in the resulting kmp_task_t record.
4443   // Copy shareds if there are any.
4444   Address KmpTaskSharedsPtr = Address::invalid();
4445   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4446     KmpTaskSharedsPtr = Address(
4447         CGF.EmitLoadOfScalar(
4448             CGF.EmitLValueForField(
4449                 TDBase,
4450                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4451             Loc),
4452         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4453     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4454     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4455     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4456   }
4457   // Emit initial values for private copies (if any).
4458   TaskResultTy Result;
4459   if (!Privates.empty()) {
4460     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4461                      SharedsTy, SharedsPtrTy, Data, Privates,
4462                      /*ForDup=*/false);
4463     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4464         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4465       Result.TaskDupFn = emitTaskDupFunction(
4466           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4467           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4468           /*WithLastIter=*/!Data.LastprivateVars.empty());
4469     }
4470   }
4471   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4472   enum { Priority = 0, Destructors = 1 };
4473   // Provide pointer to function with destructors for privates.
4474   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4475   const RecordDecl *KmpCmplrdataUD =
4476       (*FI)->getType()->getAsUnionType()->getDecl();
4477   if (NeedsCleanup) {
4478     llvm::Value *DestructorFn = emitDestructorsFunction(
4479         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4480         KmpTaskTWithPrivatesQTy);
4481     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4482     LValue DestructorsLV = CGF.EmitLValueForField(
4483         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4484     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4485                               DestructorFn, KmpRoutineEntryPtrTy),
4486                           DestructorsLV);
4487   }
4488   // Set priority.
4489   if (Data.Priority.getInt()) {
4490     LValue Data2LV = CGF.EmitLValueForField(
4491         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4492     LValue PriorityLV = CGF.EmitLValueForField(
4493         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4494     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4495   }
4496   Result.NewTask = NewTask;
4497   Result.TaskEntry = TaskEntry;
4498   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4499   Result.TDBase = TDBase;
4500   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4501   return Result;
4502 }
4503 
4504 namespace {
4505 /// Dependence kind for RTL.
4506 enum RTLDependenceKindTy {
4507   DepIn = 0x01,
4508   DepInOut = 0x3,
4509   DepMutexInOutSet = 0x4,
4510   DepInOutSet = 0x8
4511 };
4512 /// Fields ids in kmp_depend_info record.
4513 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4514 } // namespace
4515 
4516 /// Translates internal dependency kind into the runtime kind.
4517 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4518   RTLDependenceKindTy DepKind;
4519   switch (K) {
4520   case OMPC_DEPEND_in:
4521     DepKind = DepIn;
4522     break;
4523   // Out and InOut dependencies must use the same code.
4524   case OMPC_DEPEND_out:
4525   case OMPC_DEPEND_inout:
4526     DepKind = DepInOut;
4527     break;
4528   case OMPC_DEPEND_mutexinoutset:
4529     DepKind = DepMutexInOutSet;
4530     break;
4531   case OMPC_DEPEND_inoutset:
4532     DepKind = DepInOutSet;
4533     break;
4534   case OMPC_DEPEND_source:
4535   case OMPC_DEPEND_sink:
4536   case OMPC_DEPEND_depobj:
4537   case OMPC_DEPEND_unknown:
4538     llvm_unreachable("Unknown task dependence type");
4539   }
4540   return DepKind;
4541 }
4542 
4543 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4544 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4545                            QualType &FlagsTy) {
4546   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4547   if (KmpDependInfoTy.isNull()) {
4548     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4549     KmpDependInfoRD->startDefinition();
4550     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4551     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4552     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4553     KmpDependInfoRD->completeDefinition();
4554     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4555   }
4556 }
4557 
4558 std::pair<llvm::Value *, LValue>
4559 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4560                                    SourceLocation Loc) {
4561   ASTContext &C = CGM.getContext();
4562   QualType FlagsTy;
4563   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4564   RecordDecl *KmpDependInfoRD =
4565       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4566   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4567   LValue Base = CGF.EmitLoadOfPointerLValue(
4568       CGF.Builder.CreateElementBitCast(
4569           DepobjLVal.getAddress(CGF),
4570           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4571       KmpDependInfoPtrTy->castAs<PointerType>());
4572   Address DepObjAddr = CGF.Builder.CreateGEP(
4573       Base.getAddress(CGF),
4574       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4575   LValue NumDepsBase = CGF.MakeAddrLValue(
4576       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4577   // NumDeps = deps[i].base_addr;
4578   LValue BaseAddrLVal = CGF.EmitLValueForField(
4579       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4580   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4581   return std::make_pair(NumDeps, Base);
4582 }
4583 
4584 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4585                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4586                            const OMPTaskDataTy::DependData &Data,
4587                            Address DependenciesArray) {
4588   CodeGenModule &CGM = CGF.CGM;
4589   ASTContext &C = CGM.getContext();
4590   QualType FlagsTy;
4591   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4592   RecordDecl *KmpDependInfoRD =
4593       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4594   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4595 
4596   OMPIteratorGeneratorScope IteratorScope(
4597       CGF, cast_or_null<OMPIteratorExpr>(
4598                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4599                                  : nullptr));
4600   for (const Expr *E : Data.DepExprs) {
4601     llvm::Value *Addr;
4602     llvm::Value *Size;
4603     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4604     LValue Base;
4605     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4606       Base = CGF.MakeAddrLValue(
4607           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4608     } else {
4609       LValue &PosLVal = *Pos.get<LValue *>();
4610       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4611       Base = CGF.MakeAddrLValue(
4612           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4613     }
4614     // deps[i].base_addr = &<Dependencies[i].second>;
4615     LValue BaseAddrLVal = CGF.EmitLValueForField(
4616         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4617     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4618                           BaseAddrLVal);
4619     // deps[i].len = sizeof(<Dependencies[i].second>);
4620     LValue LenLVal = CGF.EmitLValueForField(
4621         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4622     CGF.EmitStoreOfScalar(Size, LenLVal);
4623     // deps[i].flags = <Dependencies[i].first>;
4624     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4625     LValue FlagsLVal = CGF.EmitLValueForField(
4626         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4627     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4628                           FlagsLVal);
4629     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4630       ++(*P);
4631     } else {
4632       LValue &PosLVal = *Pos.get<LValue *>();
4633       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4634       Idx = CGF.Builder.CreateNUWAdd(Idx,
4635                                      llvm::ConstantInt::get(Idx->getType(), 1));
4636       CGF.EmitStoreOfScalar(Idx, PosLVal);
4637     }
4638   }
4639 }
4640 
4641 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4642     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4643     const OMPTaskDataTy::DependData &Data) {
4644   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4645          "Expected depobj dependecy kind.");
4646   SmallVector<llvm::Value *, 4> Sizes;
4647   SmallVector<LValue, 4> SizeLVals;
4648   ASTContext &C = CGF.getContext();
4649   {
4650     OMPIteratorGeneratorScope IteratorScope(
4651         CGF, cast_or_null<OMPIteratorExpr>(
4652                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4653                                    : nullptr));
4654     for (const Expr *E : Data.DepExprs) {
4655       llvm::Value *NumDeps;
4656       LValue Base;
4657       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4658       std::tie(NumDeps, Base) =
4659           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4660       LValue NumLVal = CGF.MakeAddrLValue(
4661           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4662           C.getUIntPtrType());
4663       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4664                               NumLVal.getAddress(CGF));
4665       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4666       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4667       CGF.EmitStoreOfScalar(Add, NumLVal);
4668       SizeLVals.push_back(NumLVal);
4669     }
4670   }
4671   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4672     llvm::Value *Size =
4673         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4674     Sizes.push_back(Size);
4675   }
4676   return Sizes;
4677 }
4678 
4679 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4680                                          QualType &KmpDependInfoTy,
4681                                          LValue PosLVal,
4682                                          const OMPTaskDataTy::DependData &Data,
4683                                          Address DependenciesArray) {
4684   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4685          "Expected depobj dependecy kind.");
4686   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4687   {
4688     OMPIteratorGeneratorScope IteratorScope(
4689         CGF, cast_or_null<OMPIteratorExpr>(
4690                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4691                                    : nullptr));
4692     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4693       const Expr *E = Data.DepExprs[I];
4694       llvm::Value *NumDeps;
4695       LValue Base;
4696       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4697       std::tie(NumDeps, Base) =
4698           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4699 
4700       // memcopy dependency data.
4701       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4702           ElSize,
4703           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4704       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4705       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4706       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4707 
4708       // Increase pos.
4709       // pos += size;
4710       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4711       CGF.EmitStoreOfScalar(Add, PosLVal);
4712     }
4713   }
4714 }
4715 
4716 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4717     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4718     SourceLocation Loc) {
4719   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4720         return D.DepExprs.empty();
4721       }))
4722     return std::make_pair(nullptr, Address::invalid());
4723   // Process list of dependencies.
4724   ASTContext &C = CGM.getContext();
4725   Address DependenciesArray = Address::invalid();
4726   llvm::Value *NumOfElements = nullptr;
4727   unsigned NumDependencies = std::accumulate(
4728       Dependencies.begin(), Dependencies.end(), 0,
4729       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4730         return D.DepKind == OMPC_DEPEND_depobj
4731                    ? V
4732                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4733       });
4734   QualType FlagsTy;
4735   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4736   bool HasDepobjDeps = false;
4737   bool HasRegularWithIterators = false;
4738   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4739   llvm::Value *NumOfRegularWithIterators =
4740       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4741   // Calculate number of depobj dependecies and regular deps with the iterators.
4742   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4743     if (D.DepKind == OMPC_DEPEND_depobj) {
4744       SmallVector<llvm::Value *, 4> Sizes =
4745           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4746       for (llvm::Value *Size : Sizes) {
4747         NumOfDepobjElements =
4748             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4749       }
4750       HasDepobjDeps = true;
4751       continue;
4752     }
4753     // Include number of iterations, if any.
4754 
4755     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4756       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4757         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4758         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4759         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4760             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4761         NumOfRegularWithIterators =
4762             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4763       }
4764       HasRegularWithIterators = true;
4765       continue;
4766     }
4767   }
4768 
4769   QualType KmpDependInfoArrayTy;
4770   if (HasDepobjDeps || HasRegularWithIterators) {
4771     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4772                                            /*isSigned=*/false);
4773     if (HasDepobjDeps) {
4774       NumOfElements =
4775           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4776     }
4777     if (HasRegularWithIterators) {
4778       NumOfElements =
4779           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4780     }
4781     auto *OVE = new (C) OpaqueValueExpr(
4782         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4783         VK_PRValue);
4784     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4785                                                   RValue::get(NumOfElements));
4786     KmpDependInfoArrayTy =
4787         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4788                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4789     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4790     // Properly emit variable-sized array.
4791     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4792                                          ImplicitParamDecl::Other);
4793     CGF.EmitVarDecl(*PD);
4794     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4795     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4796                                               /*isSigned=*/false);
4797   } else {
4798     KmpDependInfoArrayTy = C.getConstantArrayType(
4799         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4800         ArrayType::Normal, /*IndexTypeQuals=*/0);
4801     DependenciesArray =
4802         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4803     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4804     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4805                                            /*isSigned=*/false);
4806   }
4807   unsigned Pos = 0;
4808   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4809     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4810         Dependencies[I].IteratorExpr)
4811       continue;
4812     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4813                    DependenciesArray);
4814   }
4815   // Copy regular dependecies with iterators.
4816   LValue PosLVal = CGF.MakeAddrLValue(
4817       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4818   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4819   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4820     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4821         !Dependencies[I].IteratorExpr)
4822       continue;
4823     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4824                    DependenciesArray);
4825   }
4826   // Copy final depobj arrays without iterators.
4827   if (HasDepobjDeps) {
4828     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4829       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4830         continue;
4831       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4832                          DependenciesArray);
4833     }
4834   }
4835   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4836       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4837   return std::make_pair(NumOfElements, DependenciesArray);
4838 }
4839 
4840 Address CGOpenMPRuntime::emitDepobjDependClause(
4841     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4842     SourceLocation Loc) {
4843   if (Dependencies.DepExprs.empty())
4844     return Address::invalid();
4845   // Process list of dependencies.
4846   ASTContext &C = CGM.getContext();
4847   Address DependenciesArray = Address::invalid();
4848   unsigned NumDependencies = Dependencies.DepExprs.size();
4849   QualType FlagsTy;
4850   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4851   RecordDecl *KmpDependInfoRD =
4852       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4853 
4854   llvm::Value *Size;
4855   // Define type kmp_depend_info[<Dependencies.size()>];
4856   // For depobj reserve one extra element to store the number of elements.
4857   // It is required to handle depobj(x) update(in) construct.
4858   // kmp_depend_info[<Dependencies.size()>] deps;
4859   llvm::Value *NumDepsVal;
4860   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4861   if (const auto *IE =
4862           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4863     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4864     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4865       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4866       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4867       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4868     }
4869     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4870                                     NumDepsVal);
4871     CharUnits SizeInBytes =
4872         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4873     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4874     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4875     NumDepsVal =
4876         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4877   } else {
4878     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4879         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4880         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4881     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4882     Size = CGM.getSize(Sz.alignTo(Align));
4883     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4884   }
4885   // Need to allocate on the dynamic memory.
4886   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4887   // Use default allocator.
4888   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4889   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4890 
4891   llvm::Value *Addr =
4892       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4893                               CGM.getModule(), OMPRTL___kmpc_alloc),
4894                           Args, ".dep.arr.addr");
4895   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4896   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4897       Addr, KmpDependInfoLlvmTy->getPointerTo());
4898   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4899   // Write number of elements in the first element of array for depobj.
4900   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4901   // deps[i].base_addr = NumDependencies;
4902   LValue BaseAddrLVal = CGF.EmitLValueForField(
4903       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4904   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4905   llvm::PointerUnion<unsigned *, LValue *> Pos;
4906   unsigned Idx = 1;
4907   LValue PosLVal;
4908   if (Dependencies.IteratorExpr) {
4909     PosLVal = CGF.MakeAddrLValue(
4910         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4911         C.getSizeType());
4912     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4913                           /*IsInit=*/true);
4914     Pos = &PosLVal;
4915   } else {
4916     Pos = &Idx;
4917   }
4918   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4919   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4920       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4921       CGF.Int8Ty);
4922   return DependenciesArray;
4923 }
4924 
4925 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4926                                         SourceLocation Loc) {
4927   ASTContext &C = CGM.getContext();
4928   QualType FlagsTy;
4929   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4930   LValue Base = CGF.EmitLoadOfPointerLValue(
4931       DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4932   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4933   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4934       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4935       CGF.ConvertTypeForMem(KmpDependInfoTy));
4936   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4937       Addr.getElementType(), Addr.getPointer(),
4938       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4939   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4940                                                                CGF.VoidPtrTy);
4941   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4942   // Use default allocator.
4943   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4944   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4945 
4946   // _kmpc_free(gtid, addr, nullptr);
4947   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4948                                 CGM.getModule(), OMPRTL___kmpc_free),
4949                             Args);
4950 }
4951 
4952 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4953                                        OpenMPDependClauseKind NewDepKind,
4954                                        SourceLocation Loc) {
4955   ASTContext &C = CGM.getContext();
4956   QualType FlagsTy;
4957   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4958   RecordDecl *KmpDependInfoRD =
4959       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4960   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4961   llvm::Value *NumDeps;
4962   LValue Base;
4963   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4964 
4965   Address Begin = Base.getAddress(CGF);
4966   // Cast from pointer to array type to pointer to single element.
4967   llvm::Value *End = CGF.Builder.CreateGEP(
4968       Begin.getElementType(), Begin.getPointer(), NumDeps);
4969   // The basic structure here is a while-do loop.
4970   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4971   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4972   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4973   CGF.EmitBlock(BodyBB);
4974   llvm::PHINode *ElementPHI =
4975       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4976   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4977   Begin = Begin.withPointer(ElementPHI);
4978   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4979                             Base.getTBAAInfo());
4980   // deps[i].flags = NewDepKind;
4981   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4982   LValue FlagsLVal = CGF.EmitLValueForField(
4983       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4984   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4985                         FlagsLVal);
4986 
4987   // Shift the address forward by one element.
4988   Address ElementNext =
4989       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4990   ElementPHI->addIncoming(ElementNext.getPointer(),
4991                           CGF.Builder.GetInsertBlock());
4992   llvm::Value *IsEmpty =
4993       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4994   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4995   // Done.
4996   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4997 }
4998 
4999 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5000                                    const OMPExecutableDirective &D,
5001                                    llvm::Function *TaskFunction,
5002                                    QualType SharedsTy, Address Shareds,
5003                                    const Expr *IfCond,
5004                                    const OMPTaskDataTy &Data) {
5005   if (!CGF.HaveInsertPoint())
5006     return;
5007 
5008   TaskResultTy Result =
5009       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5010   llvm::Value *NewTask = Result.NewTask;
5011   llvm::Function *TaskEntry = Result.TaskEntry;
5012   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5013   LValue TDBase = Result.TDBase;
5014   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5015   // Process list of dependences.
5016   Address DependenciesArray = Address::invalid();
5017   llvm::Value *NumOfElements;
5018   std::tie(NumOfElements, DependenciesArray) =
5019       emitDependClause(CGF, Data.Dependences, Loc);
5020 
5021   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5022   // libcall.
5023   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5024   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5025   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5026   // list is not empty
5027   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5028   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5029   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5030   llvm::Value *DepTaskArgs[7];
5031   if (!Data.Dependences.empty()) {
5032     DepTaskArgs[0] = UpLoc;
5033     DepTaskArgs[1] = ThreadID;
5034     DepTaskArgs[2] = NewTask;
5035     DepTaskArgs[3] = NumOfElements;
5036     DepTaskArgs[4] = DependenciesArray.getPointer();
5037     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5038     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5039   }
5040   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5041                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5042     if (!Data.Tied) {
5043       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5044       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5045       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5046     }
5047     if (!Data.Dependences.empty()) {
5048       CGF.EmitRuntimeCall(
5049           OMPBuilder.getOrCreateRuntimeFunction(
5050               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5051           DepTaskArgs);
5052     } else {
5053       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5054                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5055                           TaskArgs);
5056     }
5057     // Check if parent region is untied and build return for untied task;
5058     if (auto *Region =
5059             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5060       Region->emitUntiedSwitch(CGF);
5061   };
5062 
5063   llvm::Value *DepWaitTaskArgs[6];
5064   if (!Data.Dependences.empty()) {
5065     DepWaitTaskArgs[0] = UpLoc;
5066     DepWaitTaskArgs[1] = ThreadID;
5067     DepWaitTaskArgs[2] = NumOfElements;
5068     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5069     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5070     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5071   }
5072   auto &M = CGM.getModule();
5073   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5074                         TaskEntry, &Data, &DepWaitTaskArgs,
5075                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5076     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5077     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5078     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5079     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5080     // is specified.
5081     if (!Data.Dependences.empty())
5082       CGF.EmitRuntimeCall(
5083           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5084           DepWaitTaskArgs);
5085     // Call proxy_task_entry(gtid, new_task);
5086     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5087                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5088       Action.Enter(CGF);
5089       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5090       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5091                                                           OutlinedFnArgs);
5092     };
5093 
5094     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5095     // kmp_task_t *new_task);
5096     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5097     // kmp_task_t *new_task);
5098     RegionCodeGenTy RCG(CodeGen);
5099     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5100                               M, OMPRTL___kmpc_omp_task_begin_if0),
5101                           TaskArgs,
5102                           OMPBuilder.getOrCreateRuntimeFunction(
5103                               M, OMPRTL___kmpc_omp_task_complete_if0),
5104                           TaskArgs);
5105     RCG.setAction(Action);
5106     RCG(CGF);
5107   };
5108 
5109   if (IfCond) {
5110     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5111   } else {
5112     RegionCodeGenTy ThenRCG(ThenCodeGen);
5113     ThenRCG(CGF);
5114   }
5115 }
5116 
5117 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5118                                        const OMPLoopDirective &D,
5119                                        llvm::Function *TaskFunction,
5120                                        QualType SharedsTy, Address Shareds,
5121                                        const Expr *IfCond,
5122                                        const OMPTaskDataTy &Data) {
5123   if (!CGF.HaveInsertPoint())
5124     return;
5125   TaskResultTy Result =
5126       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5127   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5128   // libcall.
5129   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5130   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5131   // sched, kmp_uint64 grainsize, void *task_dup);
5132   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5133   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5134   llvm::Value *IfVal;
5135   if (IfCond) {
5136     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5137                                       /*isSigned=*/true);
5138   } else {
5139     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5140   }
5141 
5142   LValue LBLVal = CGF.EmitLValueForField(
5143       Result.TDBase,
5144       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5145   const auto *LBVar =
5146       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5147   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5148                        LBLVal.getQuals(),
5149                        /*IsInitializer=*/true);
5150   LValue UBLVal = CGF.EmitLValueForField(
5151       Result.TDBase,
5152       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5153   const auto *UBVar =
5154       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5155   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5156                        UBLVal.getQuals(),
5157                        /*IsInitializer=*/true);
5158   LValue StLVal = CGF.EmitLValueForField(
5159       Result.TDBase,
5160       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5161   const auto *StVar =
5162       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5163   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5164                        StLVal.getQuals(),
5165                        /*IsInitializer=*/true);
5166   // Store reductions address.
5167   LValue RedLVal = CGF.EmitLValueForField(
5168       Result.TDBase,
5169       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5170   if (Data.Reductions) {
5171     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5172   } else {
5173     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5174                                CGF.getContext().VoidPtrTy);
5175   }
5176   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5177   llvm::Value *TaskArgs[] = {
5178       UpLoc,
5179       ThreadID,
5180       Result.NewTask,
5181       IfVal,
5182       LBLVal.getPointer(CGF),
5183       UBLVal.getPointer(CGF),
5184       CGF.EmitLoadOfScalar(StLVal, Loc),
5185       llvm::ConstantInt::getSigned(
5186           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5187       llvm::ConstantInt::getSigned(
5188           CGF.IntTy, Data.Schedule.getPointer()
5189                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5190                          : NoSchedule),
5191       Data.Schedule.getPointer()
5192           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5193                                       /*isSigned=*/false)
5194           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5195       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5196                              Result.TaskDupFn, CGF.VoidPtrTy)
5197                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5198   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5199                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5200                       TaskArgs);
5201 }
5202 
5203 /// Emit reduction operation for each element of array (required for
5204 /// array sections) LHS op = RHS.
5205 /// \param Type Type of array.
5206 /// \param LHSVar Variable on the left side of the reduction operation
5207 /// (references element of array in original variable).
5208 /// \param RHSVar Variable on the right side of the reduction operation
5209 /// (references element of array in original variable).
5210 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5211 /// RHSVar.
5212 static void EmitOMPAggregateReduction(
5213     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5214     const VarDecl *RHSVar,
5215     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5216                                   const Expr *, const Expr *)> &RedOpGen,
5217     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5218     const Expr *UpExpr = nullptr) {
5219   // Perform element-by-element initialization.
5220   QualType ElementTy;
5221   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5222   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5223 
5224   // Drill down to the base element type on both arrays.
5225   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5226   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5227 
5228   llvm::Value *RHSBegin = RHSAddr.getPointer();
5229   llvm::Value *LHSBegin = LHSAddr.getPointer();
5230   // Cast from pointer to array type to pointer to single element.
5231   llvm::Value *LHSEnd =
5232       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5233   // The basic structure here is a while-do loop.
5234   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5235   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5236   llvm::Value *IsEmpty =
5237       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5238   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5239 
5240   // Enter the loop body, making that address the current address.
5241   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5242   CGF.EmitBlock(BodyBB);
5243 
5244   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5245 
5246   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5247       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5248   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5249   Address RHSElementCurrent(
5250       RHSElementPHI, RHSAddr.getElementType(),
5251       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5252 
5253   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5254       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5255   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5256   Address LHSElementCurrent(
5257       LHSElementPHI, LHSAddr.getElementType(),
5258       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5259 
5260   // Emit copy.
5261   CodeGenFunction::OMPPrivateScope Scope(CGF);
5262   Scope.addPrivate(LHSVar, LHSElementCurrent);
5263   Scope.addPrivate(RHSVar, RHSElementCurrent);
5264   Scope.Privatize();
5265   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5266   Scope.ForceCleanup();
5267 
5268   // Shift the address forward by one element.
5269   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5270       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5271       "omp.arraycpy.dest.element");
5272   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5273       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5274       "omp.arraycpy.src.element");
5275   // Check whether we've reached the end.
5276   llvm::Value *Done =
5277       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5278   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5279   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5280   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5281 
5282   // Done.
5283   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5284 }
5285 
5286 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5287 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5288 /// UDR combiner function.
5289 static void emitReductionCombiner(CodeGenFunction &CGF,
5290                                   const Expr *ReductionOp) {
5291   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5292     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5293       if (const auto *DRE =
5294               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5295         if (const auto *DRD =
5296                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5297           std::pair<llvm::Function *, llvm::Function *> Reduction =
5298               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5299           RValue Func = RValue::get(Reduction.first);
5300           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5301           CGF.EmitIgnoredExpr(ReductionOp);
5302           return;
5303         }
5304   CGF.EmitIgnoredExpr(ReductionOp);
5305 }
5306 
5307 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5308     SourceLocation Loc, llvm::Type *ArgsElemType,
5309     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5310     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5311   ASTContext &C = CGM.getContext();
5312 
5313   // void reduction_func(void *LHSArg, void *RHSArg);
5314   FunctionArgList Args;
5315   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5316                            ImplicitParamDecl::Other);
5317   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5318                            ImplicitParamDecl::Other);
5319   Args.push_back(&LHSArg);
5320   Args.push_back(&RHSArg);
5321   const auto &CGFI =
5322       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5323   std::string Name = getName({"omp", "reduction", "reduction_func"});
5324   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5325                                     llvm::GlobalValue::InternalLinkage, Name,
5326                                     &CGM.getModule());
5327   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5328   Fn->setDoesNotRecurse();
5329   CodeGenFunction CGF(CGM);
5330   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5331 
5332   // Dst = (void*[n])(LHSArg);
5333   // Src = (void*[n])(RHSArg);
5334   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5335                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5336                   ArgsElemType->getPointerTo()),
5337               ArgsElemType, CGF.getPointerAlign());
5338   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5339                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5340                   ArgsElemType->getPointerTo()),
5341               ArgsElemType, CGF.getPointerAlign());
5342 
5343   //  ...
5344   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5345   //  ...
5346   CodeGenFunction::OMPPrivateScope Scope(CGF);
5347   const auto *IPriv = Privates.begin();
5348   unsigned Idx = 0;
5349   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5350     const auto *RHSVar =
5351         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5352     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5353     const auto *LHSVar =
5354         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5355     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5356     QualType PrivTy = (*IPriv)->getType();
5357     if (PrivTy->isVariablyModifiedType()) {
5358       // Get array size and emit VLA type.
5359       ++Idx;
5360       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5361       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5362       const VariableArrayType *VLA =
5363           CGF.getContext().getAsVariableArrayType(PrivTy);
5364       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5365       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5366           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5367       CGF.EmitVariablyModifiedType(PrivTy);
5368     }
5369   }
5370   Scope.Privatize();
5371   IPriv = Privates.begin();
5372   const auto *ILHS = LHSExprs.begin();
5373   const auto *IRHS = RHSExprs.begin();
5374   for (const Expr *E : ReductionOps) {
5375     if ((*IPriv)->getType()->isArrayType()) {
5376       // Emit reduction for array section.
5377       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5378       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5379       EmitOMPAggregateReduction(
5380           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5381           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5382             emitReductionCombiner(CGF, E);
5383           });
5384     } else {
5385       // Emit reduction for array subscript or single variable.
5386       emitReductionCombiner(CGF, E);
5387     }
5388     ++IPriv;
5389     ++ILHS;
5390     ++IRHS;
5391   }
5392   Scope.ForceCleanup();
5393   CGF.FinishFunction();
5394   return Fn;
5395 }
5396 
5397 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5398                                                   const Expr *ReductionOp,
5399                                                   const Expr *PrivateRef,
5400                                                   const DeclRefExpr *LHS,
5401                                                   const DeclRefExpr *RHS) {
5402   if (PrivateRef->getType()->isArrayType()) {
5403     // Emit reduction for array section.
5404     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5405     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5406     EmitOMPAggregateReduction(
5407         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5408         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5409           emitReductionCombiner(CGF, ReductionOp);
5410         });
5411   } else {
5412     // Emit reduction for array subscript or single variable.
5413     emitReductionCombiner(CGF, ReductionOp);
5414   }
5415 }
5416 
5417 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5418                                     ArrayRef<const Expr *> Privates,
5419                                     ArrayRef<const Expr *> LHSExprs,
5420                                     ArrayRef<const Expr *> RHSExprs,
5421                                     ArrayRef<const Expr *> ReductionOps,
5422                                     ReductionOptionsTy Options) {
5423   if (!CGF.HaveInsertPoint())
5424     return;
5425 
5426   bool WithNowait = Options.WithNowait;
5427   bool SimpleReduction = Options.SimpleReduction;
5428 
5429   // Next code should be emitted for reduction:
5430   //
5431   // static kmp_critical_name lock = { 0 };
5432   //
5433   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5434   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5435   //  ...
5436   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5437   //  *(Type<n>-1*)rhs[<n>-1]);
5438   // }
5439   //
5440   // ...
5441   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5442   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5443   // RedList, reduce_func, &<lock>)) {
5444   // case 1:
5445   //  ...
5446   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5447   //  ...
5448   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5449   // break;
5450   // case 2:
5451   //  ...
5452   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5453   //  ...
5454   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5455   // break;
5456   // default:;
5457   // }
5458   //
5459   // if SimpleReduction is true, only the next code is generated:
5460   //  ...
5461   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5462   //  ...
5463 
5464   ASTContext &C = CGM.getContext();
5465 
5466   if (SimpleReduction) {
5467     CodeGenFunction::RunCleanupsScope Scope(CGF);
5468     const auto *IPriv = Privates.begin();
5469     const auto *ILHS = LHSExprs.begin();
5470     const auto *IRHS = RHSExprs.begin();
5471     for (const Expr *E : ReductionOps) {
5472       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5473                                   cast<DeclRefExpr>(*IRHS));
5474       ++IPriv;
5475       ++ILHS;
5476       ++IRHS;
5477     }
5478     return;
5479   }
5480 
5481   // 1. Build a list of reduction variables.
5482   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5483   auto Size = RHSExprs.size();
5484   for (const Expr *E : Privates) {
5485     if (E->getType()->isVariablyModifiedType())
5486       // Reserve place for array size.
5487       ++Size;
5488   }
5489   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5490   QualType ReductionArrayTy =
5491       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5492                              /*IndexTypeQuals=*/0);
5493   Address ReductionList =
5494       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5495   const auto *IPriv = Privates.begin();
5496   unsigned Idx = 0;
5497   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5498     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5499     CGF.Builder.CreateStore(
5500         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5501             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5502         Elem);
5503     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5504       // Store array size.
5505       ++Idx;
5506       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5507       llvm::Value *Size = CGF.Builder.CreateIntCast(
5508           CGF.getVLASize(
5509                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5510               .NumElts,
5511           CGF.SizeTy, /*isSigned=*/false);
5512       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5513                               Elem);
5514     }
5515   }
5516 
5517   // 2. Emit reduce_func().
5518   llvm::Function *ReductionFn =
5519       emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5520                             Privates, LHSExprs, RHSExprs, ReductionOps);
5521 
5522   // 3. Create static kmp_critical_name lock = { 0 };
5523   std::string Name = getName({"reduction"});
5524   llvm::Value *Lock = getCriticalRegionLock(Name);
5525 
5526   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5527   // RedList, reduce_func, &<lock>);
5528   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5529   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5530   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5531   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5532       ReductionList.getPointer(), CGF.VoidPtrTy);
5533   llvm::Value *Args[] = {
5534       IdentTLoc,                             // ident_t *<loc>
5535       ThreadId,                              // i32 <gtid>
5536       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5537       ReductionArrayTySize,                  // size_type sizeof(RedList)
5538       RL,                                    // void *RedList
5539       ReductionFn, // void (*) (void *, void *) <reduce_func>
5540       Lock         // kmp_critical_name *&<lock>
5541   };
5542   llvm::Value *Res = CGF.EmitRuntimeCall(
5543       OMPBuilder.getOrCreateRuntimeFunction(
5544           CGM.getModule(),
5545           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5546       Args);
5547 
5548   // 5. Build switch(res)
5549   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5550   llvm::SwitchInst *SwInst =
5551       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5552 
5553   // 6. Build case 1:
5554   //  ...
5555   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5556   //  ...
5557   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5558   // break;
5559   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5560   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5561   CGF.EmitBlock(Case1BB);
5562 
5563   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5564   llvm::Value *EndArgs[] = {
5565       IdentTLoc, // ident_t *<loc>
5566       ThreadId,  // i32 <gtid>
5567       Lock       // kmp_critical_name *&<lock>
5568   };
5569   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5570                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5571     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5572     const auto *IPriv = Privates.begin();
5573     const auto *ILHS = LHSExprs.begin();
5574     const auto *IRHS = RHSExprs.begin();
5575     for (const Expr *E : ReductionOps) {
5576       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5577                                      cast<DeclRefExpr>(*IRHS));
5578       ++IPriv;
5579       ++ILHS;
5580       ++IRHS;
5581     }
5582   };
5583   RegionCodeGenTy RCG(CodeGen);
5584   CommonActionTy Action(
5585       nullptr, llvm::None,
5586       OMPBuilder.getOrCreateRuntimeFunction(
5587           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5588                                       : OMPRTL___kmpc_end_reduce),
5589       EndArgs);
5590   RCG.setAction(Action);
5591   RCG(CGF);
5592 
5593   CGF.EmitBranch(DefaultBB);
5594 
5595   // 7. Build case 2:
5596   //  ...
5597   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5598   //  ...
5599   // break;
5600   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5601   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5602   CGF.EmitBlock(Case2BB);
5603 
5604   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5605                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5606     const auto *ILHS = LHSExprs.begin();
5607     const auto *IRHS = RHSExprs.begin();
5608     const auto *IPriv = Privates.begin();
5609     for (const Expr *E : ReductionOps) {
5610       const Expr *XExpr = nullptr;
5611       const Expr *EExpr = nullptr;
5612       const Expr *UpExpr = nullptr;
5613       BinaryOperatorKind BO = BO_Comma;
5614       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5615         if (BO->getOpcode() == BO_Assign) {
5616           XExpr = BO->getLHS();
5617           UpExpr = BO->getRHS();
5618         }
5619       }
5620       // Try to emit update expression as a simple atomic.
5621       const Expr *RHSExpr = UpExpr;
5622       if (RHSExpr) {
5623         // Analyze RHS part of the whole expression.
5624         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5625                 RHSExpr->IgnoreParenImpCasts())) {
5626           // If this is a conditional operator, analyze its condition for
5627           // min/max reduction operator.
5628           RHSExpr = ACO->getCond();
5629         }
5630         if (const auto *BORHS =
5631                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5632           EExpr = BORHS->getRHS();
5633           BO = BORHS->getOpcode();
5634         }
5635       }
5636       if (XExpr) {
5637         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5638         auto &&AtomicRedGen = [BO, VD,
5639                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5640                                     const Expr *EExpr, const Expr *UpExpr) {
5641           LValue X = CGF.EmitLValue(XExpr);
5642           RValue E;
5643           if (EExpr)
5644             E = CGF.EmitAnyExpr(EExpr);
5645           CGF.EmitOMPAtomicSimpleUpdateExpr(
5646               X, E, BO, /*IsXLHSInRHSPart=*/true,
5647               llvm::AtomicOrdering::Monotonic, Loc,
5648               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5649                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5650                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5651                 CGF.emitOMPSimpleStore(
5652                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5653                     VD->getType().getNonReferenceType(), Loc);
5654                 PrivateScope.addPrivate(VD, LHSTemp);
5655                 (void)PrivateScope.Privatize();
5656                 return CGF.EmitAnyExpr(UpExpr);
5657               });
5658         };
5659         if ((*IPriv)->getType()->isArrayType()) {
5660           // Emit atomic reduction for array section.
5661           const auto *RHSVar =
5662               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5663           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5664                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5665         } else {
5666           // Emit atomic reduction for array subscript or single variable.
5667           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5668         }
5669       } else {
5670         // Emit as a critical region.
5671         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5672                                            const Expr *, const Expr *) {
5673           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5674           std::string Name = RT.getName({"atomic_reduction"});
5675           RT.emitCriticalRegion(
5676               CGF, Name,
5677               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5678                 Action.Enter(CGF);
5679                 emitReductionCombiner(CGF, E);
5680               },
5681               Loc);
5682         };
5683         if ((*IPriv)->getType()->isArrayType()) {
5684           const auto *LHSVar =
5685               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5686           const auto *RHSVar =
5687               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5688           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5689                                     CritRedGen);
5690         } else {
5691           CritRedGen(CGF, nullptr, nullptr, nullptr);
5692         }
5693       }
5694       ++ILHS;
5695       ++IRHS;
5696       ++IPriv;
5697     }
5698   };
5699   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5700   if (!WithNowait) {
5701     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5702     llvm::Value *EndArgs[] = {
5703         IdentTLoc, // ident_t *<loc>
5704         ThreadId,  // i32 <gtid>
5705         Lock       // kmp_critical_name *&<lock>
5706     };
5707     CommonActionTy Action(nullptr, llvm::None,
5708                           OMPBuilder.getOrCreateRuntimeFunction(
5709                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5710                           EndArgs);
5711     AtomicRCG.setAction(Action);
5712     AtomicRCG(CGF);
5713   } else {
5714     AtomicRCG(CGF);
5715   }
5716 
5717   CGF.EmitBranch(DefaultBB);
5718   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5719 }
5720 
5721 /// Generates unique name for artificial threadprivate variables.
5722 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5723 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5724                                       const Expr *Ref) {
5725   SmallString<256> Buffer;
5726   llvm::raw_svector_ostream Out(Buffer);
5727   const clang::DeclRefExpr *DE;
5728   const VarDecl *D = ::getBaseDecl(Ref, DE);
5729   if (!D)
5730     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5731   D = D->getCanonicalDecl();
5732   std::string Name = CGM.getOpenMPRuntime().getName(
5733       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5734   Out << Prefix << Name << "_"
5735       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5736   return std::string(Out.str());
5737 }
5738 
5739 /// Emits reduction initializer function:
5740 /// \code
5741 /// void @.red_init(void* %arg, void* %orig) {
5742 /// %0 = bitcast void* %arg to <type>*
5743 /// store <type> <init>, <type>* %0
5744 /// ret void
5745 /// }
5746 /// \endcode
5747 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5748                                            SourceLocation Loc,
5749                                            ReductionCodeGen &RCG, unsigned N) {
5750   ASTContext &C = CGM.getContext();
5751   QualType VoidPtrTy = C.VoidPtrTy;
5752   VoidPtrTy.addRestrict();
5753   FunctionArgList Args;
5754   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5755                           ImplicitParamDecl::Other);
5756   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5757                               ImplicitParamDecl::Other);
5758   Args.emplace_back(&Param);
5759   Args.emplace_back(&ParamOrig);
5760   const auto &FnInfo =
5761       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5762   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5763   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5764   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5765                                     Name, &CGM.getModule());
5766   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5767   Fn->setDoesNotRecurse();
5768   CodeGenFunction CGF(CGM);
5769   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5770   QualType PrivateType = RCG.getPrivateType(N);
5771   Address PrivateAddr = CGF.EmitLoadOfPointer(
5772       CGF.Builder.CreateElementBitCast(
5773           CGF.GetAddrOfLocalVar(&Param),
5774           CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5775       C.getPointerType(PrivateType)->castAs<PointerType>());
5776   llvm::Value *Size = nullptr;
5777   // If the size of the reduction item is non-constant, load it from global
5778   // threadprivate variable.
5779   if (RCG.getSizes(N).second) {
5780     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5781         CGF, CGM.getContext().getSizeType(),
5782         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5783     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5784                                 CGM.getContext().getSizeType(), Loc);
5785   }
5786   RCG.emitAggregateType(CGF, N, Size);
5787   Address OrigAddr = Address::invalid();
5788   // If initializer uses initializer from declare reduction construct, emit a
5789   // pointer to the address of the original reduction item (reuired by reduction
5790   // initializer)
5791   if (RCG.usesReductionInitializer(N)) {
5792     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5793     OrigAddr = CGF.EmitLoadOfPointer(
5794         SharedAddr,
5795         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5796   }
5797   // Emit the initializer:
5798   // %0 = bitcast void* %arg to <type>*
5799   // store <type> <init>, <type>* %0
5800   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5801                          [](CodeGenFunction &) { return false; });
5802   CGF.FinishFunction();
5803   return Fn;
5804 }
5805 
5806 /// Emits reduction combiner function:
5807 /// \code
5808 /// void @.red_comb(void* %arg0, void* %arg1) {
5809 /// %lhs = bitcast void* %arg0 to <type>*
5810 /// %rhs = bitcast void* %arg1 to <type>*
5811 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5812 /// store <type> %2, <type>* %lhs
5813 /// ret void
5814 /// }
5815 /// \endcode
5816 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5817                                            SourceLocation Loc,
5818                                            ReductionCodeGen &RCG, unsigned N,
5819                                            const Expr *ReductionOp,
5820                                            const Expr *LHS, const Expr *RHS,
5821                                            const Expr *PrivateRef) {
5822   ASTContext &C = CGM.getContext();
5823   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5824   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5825   FunctionArgList Args;
5826   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5827                                C.VoidPtrTy, ImplicitParamDecl::Other);
5828   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5829                             ImplicitParamDecl::Other);
5830   Args.emplace_back(&ParamInOut);
5831   Args.emplace_back(&ParamIn);
5832   const auto &FnInfo =
5833       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5834   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5835   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5836   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5837                                     Name, &CGM.getModule());
5838   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5839   Fn->setDoesNotRecurse();
5840   CodeGenFunction CGF(CGM);
5841   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5842   llvm::Value *Size = nullptr;
5843   // If the size of the reduction item is non-constant, load it from global
5844   // threadprivate variable.
5845   if (RCG.getSizes(N).second) {
5846     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5847         CGF, CGM.getContext().getSizeType(),
5848         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5849     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5850                                 CGM.getContext().getSizeType(), Loc);
5851   }
5852   RCG.emitAggregateType(CGF, N, Size);
5853   // Remap lhs and rhs variables to the addresses of the function arguments.
5854   // %lhs = bitcast void* %arg0 to <type>*
5855   // %rhs = bitcast void* %arg1 to <type>*
5856   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5857   PrivateScope.addPrivate(
5858       LHSVD,
5859       // Pull out the pointer to the variable.
5860       CGF.EmitLoadOfPointer(
5861           CGF.Builder.CreateElementBitCast(
5862               CGF.GetAddrOfLocalVar(&ParamInOut),
5863               CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5864           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5865   PrivateScope.addPrivate(
5866       RHSVD,
5867       // Pull out the pointer to the variable.
5868       CGF.EmitLoadOfPointer(
5869           CGF.Builder.CreateElementBitCast(
5870             CGF.GetAddrOfLocalVar(&ParamIn),
5871             CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5872           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5873   PrivateScope.Privatize();
5874   // Emit the combiner body:
5875   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5876   // store <type> %2, <type>* %lhs
5877   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5878       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5879       cast<DeclRefExpr>(RHS));
5880   CGF.FinishFunction();
5881   return Fn;
5882 }
5883 
5884 /// Emits reduction finalizer function:
5885 /// \code
5886 /// void @.red_fini(void* %arg) {
5887 /// %0 = bitcast void* %arg to <type>*
5888 /// <destroy>(<type>* %0)
5889 /// ret void
5890 /// }
5891 /// \endcode
5892 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5893                                            SourceLocation Loc,
5894                                            ReductionCodeGen &RCG, unsigned N) {
5895   if (!RCG.needCleanups(N))
5896     return nullptr;
5897   ASTContext &C = CGM.getContext();
5898   FunctionArgList Args;
5899   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5900                           ImplicitParamDecl::Other);
5901   Args.emplace_back(&Param);
5902   const auto &FnInfo =
5903       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5904   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5905   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5906   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5907                                     Name, &CGM.getModule());
5908   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5909   Fn->setDoesNotRecurse();
5910   CodeGenFunction CGF(CGM);
5911   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5912   Address PrivateAddr = CGF.EmitLoadOfPointer(
5913       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5914   llvm::Value *Size = nullptr;
5915   // If the size of the reduction item is non-constant, load it from global
5916   // threadprivate variable.
5917   if (RCG.getSizes(N).second) {
5918     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5919         CGF, CGM.getContext().getSizeType(),
5920         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5921     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5922                                 CGM.getContext().getSizeType(), Loc);
5923   }
5924   RCG.emitAggregateType(CGF, N, Size);
5925   // Emit the finalizer body:
5926   // <destroy>(<type>* %0)
5927   RCG.emitCleanups(CGF, N, PrivateAddr);
5928   CGF.FinishFunction(Loc);
5929   return Fn;
5930 }
5931 
5932 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5933     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5934     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5935   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5936     return nullptr;
5937 
5938   // Build typedef struct:
5939   // kmp_taskred_input {
5940   //   void *reduce_shar; // shared reduction item
5941   //   void *reduce_orig; // original reduction item used for initialization
5942   //   size_t reduce_size; // size of data item
5943   //   void *reduce_init; // data initialization routine
5944   //   void *reduce_fini; // data finalization routine
5945   //   void *reduce_comb; // data combiner routine
5946   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5947   // } kmp_taskred_input_t;
5948   ASTContext &C = CGM.getContext();
5949   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5950   RD->startDefinition();
5951   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5952   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5953   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5954   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5955   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5956   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5957   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5958       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5959   RD->completeDefinition();
5960   QualType RDType = C.getRecordType(RD);
5961   unsigned Size = Data.ReductionVars.size();
5962   llvm::APInt ArraySize(/*numBits=*/64, Size);
5963   QualType ArrayRDType = C.getConstantArrayType(
5964       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5965   // kmp_task_red_input_t .rd_input.[Size];
5966   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5967   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5968                        Data.ReductionCopies, Data.ReductionOps);
5969   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5970     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5971     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5972                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5973     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5974         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5975         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5976         ".rd_input.gep.");
5977     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5978     // ElemLVal.reduce_shar = &Shareds[Cnt];
5979     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5980     RCG.emitSharedOrigLValue(CGF, Cnt);
5981     llvm::Value *CastedShared =
5982         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
5983     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5984     // ElemLVal.reduce_orig = &Origs[Cnt];
5985     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5986     llvm::Value *CastedOrig =
5987         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
5988     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
5989     RCG.emitAggregateType(CGF, Cnt);
5990     llvm::Value *SizeValInChars;
5991     llvm::Value *SizeVal;
5992     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5993     // We use delayed creation/initialization for VLAs and array sections. It is
5994     // required because runtime does not provide the way to pass the sizes of
5995     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5996     // threadprivate global variables are used to store these values and use
5997     // them in the functions.
5998     bool DelayedCreation = !!SizeVal;
5999     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6000                                                /*isSigned=*/false);
6001     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6002     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6003     // ElemLVal.reduce_init = init;
6004     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6005     llvm::Value *InitAddr =
6006         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6007     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6008     // ElemLVal.reduce_fini = fini;
6009     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6010     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6011     llvm::Value *FiniAddr = Fini
6012                                 ? CGF.EmitCastToVoidPtr(Fini)
6013                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6014     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6015     // ElemLVal.reduce_comb = comb;
6016     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6017     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6018         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6019         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6020     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6021     // ElemLVal.flags = 0;
6022     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6023     if (DelayedCreation) {
6024       CGF.EmitStoreOfScalar(
6025           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6026           FlagsLVal);
6027     } else
6028       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6029                                  FlagsLVal.getType());
6030   }
6031   if (Data.IsReductionWithTaskMod) {
6032     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6033     // is_ws, int num, void *data);
6034     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6035     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6036                                                   CGM.IntTy, /*isSigned=*/true);
6037     llvm::Value *Args[] = {
6038         IdentTLoc, GTid,
6039         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6040                                /*isSigned=*/true),
6041         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6042         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6043             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6044     return CGF.EmitRuntimeCall(
6045         OMPBuilder.getOrCreateRuntimeFunction(
6046             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6047         Args);
6048   }
6049   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6050   llvm::Value *Args[] = {
6051       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6052                                 /*isSigned=*/true),
6053       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6054       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6055                                                       CGM.VoidPtrTy)};
6056   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6057                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6058                              Args);
6059 }
6060 
6061 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6062                                             SourceLocation Loc,
6063                                             bool IsWorksharingReduction) {
6064   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6065   // is_ws, int num, void *data);
6066   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6067   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6068                                                 CGM.IntTy, /*isSigned=*/true);
6069   llvm::Value *Args[] = {IdentTLoc, GTid,
6070                          llvm::ConstantInt::get(CGM.IntTy,
6071                                                 IsWorksharingReduction ? 1 : 0,
6072                                                 /*isSigned=*/true)};
6073   (void)CGF.EmitRuntimeCall(
6074       OMPBuilder.getOrCreateRuntimeFunction(
6075           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6076       Args);
6077 }
6078 
6079 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6080                                               SourceLocation Loc,
6081                                               ReductionCodeGen &RCG,
6082                                               unsigned N) {
6083   auto Sizes = RCG.getSizes(N);
6084   // Emit threadprivate global variable if the type is non-constant
6085   // (Sizes.second = nullptr).
6086   if (Sizes.second) {
6087     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6088                                                      /*isSigned=*/false);
6089     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6090         CGF, CGM.getContext().getSizeType(),
6091         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6092     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6093   }
6094 }
6095 
6096 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6097                                               SourceLocation Loc,
6098                                               llvm::Value *ReductionsPtr,
6099                                               LValue SharedLVal) {
6100   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6101   // *d);
6102   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6103                                                    CGM.IntTy,
6104                                                    /*isSigned=*/true),
6105                          ReductionsPtr,
6106                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6107                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6108   return Address(
6109       CGF.EmitRuntimeCall(
6110           OMPBuilder.getOrCreateRuntimeFunction(
6111               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6112           Args),
6113       CGF.Int8Ty, SharedLVal.getAlignment());
6114 }
6115 
6116 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6117                                        const OMPTaskDataTy &Data) {
6118   if (!CGF.HaveInsertPoint())
6119     return;
6120 
6121   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6122     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6123     OMPBuilder.createTaskwait(CGF.Builder);
6124   } else {
6125     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6126     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6127     auto &M = CGM.getModule();
6128     Address DependenciesArray = Address::invalid();
6129     llvm::Value *NumOfElements;
6130     std::tie(NumOfElements, DependenciesArray) =
6131         emitDependClause(CGF, Data.Dependences, Loc);
6132     llvm::Value *DepWaitTaskArgs[6];
6133     if (!Data.Dependences.empty()) {
6134       DepWaitTaskArgs[0] = UpLoc;
6135       DepWaitTaskArgs[1] = ThreadID;
6136       DepWaitTaskArgs[2] = NumOfElements;
6137       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6138       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6139       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6140 
6141       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6142 
6143       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6144       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6145       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6146       // is specified.
6147       CGF.EmitRuntimeCall(
6148           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6149           DepWaitTaskArgs);
6150 
6151     } else {
6152 
6153       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6154       // global_tid);
6155       llvm::Value *Args[] = {UpLoc, ThreadID};
6156       // Ignore return result until untied tasks are supported.
6157       CGF.EmitRuntimeCall(
6158           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6159           Args);
6160     }
6161   }
6162 
6163   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6164     Region->emitUntiedSwitch(CGF);
6165 }
6166 
6167 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6168                                            OpenMPDirectiveKind InnerKind,
6169                                            const RegionCodeGenTy &CodeGen,
6170                                            bool HasCancel) {
6171   if (!CGF.HaveInsertPoint())
6172     return;
6173   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6174                                  InnerKind != OMPD_critical &&
6175                                      InnerKind != OMPD_master &&
6176                                      InnerKind != OMPD_masked);
6177   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6178 }
6179 
6180 namespace {
6181 enum RTCancelKind {
6182   CancelNoreq = 0,
6183   CancelParallel = 1,
6184   CancelLoop = 2,
6185   CancelSections = 3,
6186   CancelTaskgroup = 4
6187 };
6188 } // anonymous namespace
6189 
6190 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6191   RTCancelKind CancelKind = CancelNoreq;
6192   if (CancelRegion == OMPD_parallel)
6193     CancelKind = CancelParallel;
6194   else if (CancelRegion == OMPD_for)
6195     CancelKind = CancelLoop;
6196   else if (CancelRegion == OMPD_sections)
6197     CancelKind = CancelSections;
6198   else {
6199     assert(CancelRegion == OMPD_taskgroup);
6200     CancelKind = CancelTaskgroup;
6201   }
6202   return CancelKind;
6203 }
6204 
6205 void CGOpenMPRuntime::emitCancellationPointCall(
6206     CodeGenFunction &CGF, SourceLocation Loc,
6207     OpenMPDirectiveKind CancelRegion) {
6208   if (!CGF.HaveInsertPoint())
6209     return;
6210   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6211   // global_tid, kmp_int32 cncl_kind);
6212   if (auto *OMPRegionInfo =
6213           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6214     // For 'cancellation point taskgroup', the task region info may not have a
6215     // cancel. This may instead happen in another adjacent task.
6216     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6217       llvm::Value *Args[] = {
6218           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6219           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6220       // Ignore return result until untied tasks are supported.
6221       llvm::Value *Result = CGF.EmitRuntimeCall(
6222           OMPBuilder.getOrCreateRuntimeFunction(
6223               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6224           Args);
6225       // if (__kmpc_cancellationpoint()) {
6226       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6227       //   exit from construct;
6228       // }
6229       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6230       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6231       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6232       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6233       CGF.EmitBlock(ExitBB);
6234       if (CancelRegion == OMPD_parallel)
6235         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6236       // exit from construct;
6237       CodeGenFunction::JumpDest CancelDest =
6238           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6239       CGF.EmitBranchThroughCleanup(CancelDest);
6240       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6241     }
6242   }
6243 }
6244 
6245 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6246                                      const Expr *IfCond,
6247                                      OpenMPDirectiveKind CancelRegion) {
6248   if (!CGF.HaveInsertPoint())
6249     return;
6250   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6251   // kmp_int32 cncl_kind);
6252   auto &M = CGM.getModule();
6253   if (auto *OMPRegionInfo =
6254           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6255     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6256                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6257       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6258       llvm::Value *Args[] = {
6259           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6260           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6261       // Ignore return result until untied tasks are supported.
6262       llvm::Value *Result = CGF.EmitRuntimeCall(
6263           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6264       // if (__kmpc_cancel()) {
6265       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6266       //   exit from construct;
6267       // }
6268       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6269       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6270       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6271       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6272       CGF.EmitBlock(ExitBB);
6273       if (CancelRegion == OMPD_parallel)
6274         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6275       // exit from construct;
6276       CodeGenFunction::JumpDest CancelDest =
6277           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6278       CGF.EmitBranchThroughCleanup(CancelDest);
6279       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6280     };
6281     if (IfCond) {
6282       emitIfClause(CGF, IfCond, ThenGen,
6283                    [](CodeGenFunction &, PrePostActionTy &) {});
6284     } else {
6285       RegionCodeGenTy ThenRCG(ThenGen);
6286       ThenRCG(CGF);
6287     }
6288   }
6289 }
6290 
6291 namespace {
6292 /// Cleanup action for uses_allocators support.
6293 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6294   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6295 
6296 public:
6297   OMPUsesAllocatorsActionTy(
6298       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6299       : Allocators(Allocators) {}
6300   void Enter(CodeGenFunction &CGF) override {
6301     if (!CGF.HaveInsertPoint())
6302       return;
6303     for (const auto &AllocatorData : Allocators) {
6304       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6305           CGF, AllocatorData.first, AllocatorData.second);
6306     }
6307   }
6308   void Exit(CodeGenFunction &CGF) override {
6309     if (!CGF.HaveInsertPoint())
6310       return;
6311     for (const auto &AllocatorData : Allocators) {
6312       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6313                                                         AllocatorData.first);
6314     }
6315   }
6316 };
6317 } // namespace
6318 
6319 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6320     const OMPExecutableDirective &D, StringRef ParentName,
6321     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6322     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6323   assert(!ParentName.empty() && "Invalid target region parent name!");
6324   HasEmittedTargetRegion = true;
6325   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6326   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6327     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6328       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6329       if (!D.AllocatorTraits)
6330         continue;
6331       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6332     }
6333   }
6334   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6335   CodeGen.setAction(UsesAllocatorAction);
6336   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6337                                    IsOffloadEntry, CodeGen);
6338 }
6339 
6340 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6341                                              const Expr *Allocator,
6342                                              const Expr *AllocatorTraits) {
6343   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6344   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6345   // Use default memspace handle.
6346   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6347   llvm::Value *NumTraits = llvm::ConstantInt::get(
6348       CGF.IntTy, cast<ConstantArrayType>(
6349                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6350                      ->getSize()
6351                      .getLimitedValue());
6352   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6353   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6354       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6355   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6356                                            AllocatorTraitsLVal.getBaseInfo(),
6357                                            AllocatorTraitsLVal.getTBAAInfo());
6358   llvm::Value *Traits =
6359       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6360 
6361   llvm::Value *AllocatorVal =
6362       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6363                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6364                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6365   // Store to allocator.
6366   CGF.EmitVarDecl(*cast<VarDecl>(
6367       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6368   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6369   AllocatorVal =
6370       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6371                                Allocator->getType(), Allocator->getExprLoc());
6372   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6373 }
6374 
6375 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6376                                              const Expr *Allocator) {
6377   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6378   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6379   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6380   llvm::Value *AllocatorVal =
6381       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6382   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6383                                           CGF.getContext().VoidPtrTy,
6384                                           Allocator->getExprLoc());
6385   (void)CGF.EmitRuntimeCall(
6386       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6387                                             OMPRTL___kmpc_destroy_allocator),
6388       {ThreadId, AllocatorVal});
6389 }
6390 
6391 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6392     const OMPExecutableDirective &D, StringRef ParentName,
6393     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6394     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6395   // Create a unique name for the entry function using the source location
6396   // information of the current target region. The name will be something like:
6397   //
6398   // __omp_offloading_DD_FFFF_PP_lBB
6399   //
6400   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6401   // mangled name of the function that encloses the target region and BB is the
6402   // line number of the target region.
6403 
6404   const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6405                                !CGM.getLangOpts().OpenMPOffloadMandatory;
6406   unsigned DeviceID;
6407   unsigned FileID;
6408   unsigned Line;
6409   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6410                            Line);
6411   SmallString<64> EntryFnName;
6412   {
6413     llvm::raw_svector_ostream OS(EntryFnName);
6414     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6415        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6416   }
6417 
6418   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6419 
6420   CodeGenFunction CGF(CGM, true);
6421   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6422   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6423 
6424   if (BuildOutlinedFn)
6425     OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6426 
6427   // If this target outline function is not an offload entry, we don't need to
6428   // register it.
6429   if (!IsOffloadEntry)
6430     return;
6431 
6432   // The target region ID is used by the runtime library to identify the current
6433   // target region, so it only has to be unique and not necessarily point to
6434   // anything. It could be the pointer to the outlined function that implements
6435   // the target region, but we aren't using that so that the compiler doesn't
6436   // need to keep that, and could therefore inline the host function if proven
6437   // worthwhile during optimization. In the other hand, if emitting code for the
6438   // device, the ID has to be the function address so that it can retrieved from
6439   // the offloading entry and launched by the runtime library. We also mark the
6440   // outlined function to have external linkage in case we are emitting code for
6441   // the device, because these functions will be entry points to the device.
6442 
6443   if (CGM.getLangOpts().OpenMPIsDevice) {
6444     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6445     OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
6446     OutlinedFn->setDSOLocal(false);
6447     if (CGM.getTriple().isAMDGCN())
6448       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6449   } else {
6450     std::string Name = getName({EntryFnName, "region_id"});
6451     OutlinedFnID = new llvm::GlobalVariable(
6452         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6453         llvm::GlobalValue::WeakAnyLinkage,
6454         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6455   }
6456 
6457   // If we do not allow host fallback we still need a named address to use.
6458   llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6459   if (!BuildOutlinedFn) {
6460     assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
6461            "Named kernel already exists?");
6462     TargetRegionEntryAddr = new llvm::GlobalVariable(
6463         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6464         llvm::GlobalValue::InternalLinkage,
6465         llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6466   }
6467 
6468   // Register the information for the entry associated with this target region.
6469   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6470       DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
6471       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6472 
6473   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6474   int32_t DefaultValTeams = -1;
6475   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6476   if (DefaultValTeams > 0 && OutlinedFn) {
6477     OutlinedFn->addFnAttr("omp_target_num_teams",
6478                           std::to_string(DefaultValTeams));
6479   }
6480   int32_t DefaultValThreads = -1;
6481   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6482   if (DefaultValThreads > 0 && OutlinedFn) {
6483     OutlinedFn->addFnAttr("omp_target_thread_limit",
6484                           std::to_string(DefaultValThreads));
6485   }
6486 
6487   if (BuildOutlinedFn)
6488     CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6489 }
6490 
6491 /// Checks if the expression is constant or does not have non-trivial function
6492 /// calls.
6493 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6494   // We can skip constant expressions.
6495   // We can skip expressions with trivial calls or simple expressions.
6496   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6497           !E->hasNonTrivialCall(Ctx)) &&
6498          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6499 }
6500 
6501 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6502                                                     const Stmt *Body) {
6503   const Stmt *Child = Body->IgnoreContainers();
6504   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6505     Child = nullptr;
6506     for (const Stmt *S : C->body()) {
6507       if (const auto *E = dyn_cast<Expr>(S)) {
6508         if (isTrivial(Ctx, E))
6509           continue;
6510       }
6511       // Some of the statements can be ignored.
6512       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6513           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6514         continue;
6515       // Analyze declarations.
6516       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6517         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6518               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6519                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6520                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6521                   isa<UsingDirectiveDecl>(D) ||
6522                   isa<OMPDeclareReductionDecl>(D) ||
6523                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6524                 return true;
6525               const auto *VD = dyn_cast<VarDecl>(D);
6526               if (!VD)
6527                 return false;
6528               return VD->hasGlobalStorage() || !VD->isUsed();
6529             }))
6530           continue;
6531       }
6532       // Found multiple children - cannot get the one child only.
6533       if (Child)
6534         return nullptr;
6535       Child = S;
6536     }
6537     if (Child)
6538       Child = Child->IgnoreContainers();
6539   }
6540   return Child;
6541 }
6542 
6543 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6544     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6545     int32_t &DefaultVal) {
6546 
6547   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6548   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6549          "Expected target-based executable directive.");
6550   switch (DirectiveKind) {
6551   case OMPD_target: {
6552     const auto *CS = D.getInnermostCapturedStmt();
6553     const auto *Body =
6554         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6555     const Stmt *ChildStmt =
6556         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6557     if (const auto *NestedDir =
6558             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6559       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6560         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6561           const Expr *NumTeams =
6562               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6563           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6564             if (auto Constant =
6565                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6566               DefaultVal = Constant->getExtValue();
6567           return NumTeams;
6568         }
6569         DefaultVal = 0;
6570         return nullptr;
6571       }
6572       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6573           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6574         DefaultVal = 1;
6575         return nullptr;
6576       }
6577       DefaultVal = 1;
6578       return nullptr;
6579     }
6580     // A value of -1 is used to check if we need to emit no teams region
6581     DefaultVal = -1;
6582     return nullptr;
6583   }
6584   case OMPD_target_teams:
6585   case OMPD_target_teams_distribute:
6586   case OMPD_target_teams_distribute_simd:
6587   case OMPD_target_teams_distribute_parallel_for:
6588   case OMPD_target_teams_distribute_parallel_for_simd: {
6589     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6590       const Expr *NumTeams =
6591           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6592       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6593         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6594           DefaultVal = Constant->getExtValue();
6595       return NumTeams;
6596     }
6597     DefaultVal = 0;
6598     return nullptr;
6599   }
6600   case OMPD_target_parallel:
6601   case OMPD_target_parallel_for:
6602   case OMPD_target_parallel_for_simd:
6603   case OMPD_target_simd:
6604     DefaultVal = 1;
6605     return nullptr;
6606   case OMPD_parallel:
6607   case OMPD_for:
6608   case OMPD_parallel_for:
6609   case OMPD_parallel_master:
6610   case OMPD_parallel_sections:
6611   case OMPD_for_simd:
6612   case OMPD_parallel_for_simd:
6613   case OMPD_cancel:
6614   case OMPD_cancellation_point:
6615   case OMPD_ordered:
6616   case OMPD_threadprivate:
6617   case OMPD_allocate:
6618   case OMPD_task:
6619   case OMPD_simd:
6620   case OMPD_tile:
6621   case OMPD_unroll:
6622   case OMPD_sections:
6623   case OMPD_section:
6624   case OMPD_single:
6625   case OMPD_master:
6626   case OMPD_critical:
6627   case OMPD_taskyield:
6628   case OMPD_barrier:
6629   case OMPD_taskwait:
6630   case OMPD_taskgroup:
6631   case OMPD_atomic:
6632   case OMPD_flush:
6633   case OMPD_depobj:
6634   case OMPD_scan:
6635   case OMPD_teams:
6636   case OMPD_target_data:
6637   case OMPD_target_exit_data:
6638   case OMPD_target_enter_data:
6639   case OMPD_distribute:
6640   case OMPD_distribute_simd:
6641   case OMPD_distribute_parallel_for:
6642   case OMPD_distribute_parallel_for_simd:
6643   case OMPD_teams_distribute:
6644   case OMPD_teams_distribute_simd:
6645   case OMPD_teams_distribute_parallel_for:
6646   case OMPD_teams_distribute_parallel_for_simd:
6647   case OMPD_target_update:
6648   case OMPD_declare_simd:
6649   case OMPD_declare_variant:
6650   case OMPD_begin_declare_variant:
6651   case OMPD_end_declare_variant:
6652   case OMPD_declare_target:
6653   case OMPD_end_declare_target:
6654   case OMPD_declare_reduction:
6655   case OMPD_declare_mapper:
6656   case OMPD_taskloop:
6657   case OMPD_taskloop_simd:
6658   case OMPD_master_taskloop:
6659   case OMPD_master_taskloop_simd:
6660   case OMPD_parallel_master_taskloop:
6661   case OMPD_parallel_master_taskloop_simd:
6662   case OMPD_requires:
6663   case OMPD_metadirective:
6664   case OMPD_unknown:
6665     break;
6666   default:
6667     break;
6668   }
6669   llvm_unreachable("Unexpected directive kind.");
6670 }
6671 
6672 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6673     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6674   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6675          "Clauses associated with the teams directive expected to be emitted "
6676          "only for the host!");
6677   CGBuilderTy &Bld = CGF.Builder;
6678   int32_t DefaultNT = -1;
6679   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6680   if (NumTeams != nullptr) {
6681     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6682 
6683     switch (DirectiveKind) {
6684     case OMPD_target: {
6685       const auto *CS = D.getInnermostCapturedStmt();
6686       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6687       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6688       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6689                                                   /*IgnoreResultAssign*/ true);
6690       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6691                              /*isSigned=*/true);
6692     }
6693     case OMPD_target_teams:
6694     case OMPD_target_teams_distribute:
6695     case OMPD_target_teams_distribute_simd:
6696     case OMPD_target_teams_distribute_parallel_for:
6697     case OMPD_target_teams_distribute_parallel_for_simd: {
6698       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6699       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6700                                                   /*IgnoreResultAssign*/ true);
6701       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6702                              /*isSigned=*/true);
6703     }
6704     default:
6705       break;
6706     }
6707   } else if (DefaultNT == -1) {
6708     return nullptr;
6709   }
6710 
6711   return Bld.getInt32(DefaultNT);
6712 }
6713 
6714 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6715                                   llvm::Value *DefaultThreadLimitVal) {
6716   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6717       CGF.getContext(), CS->getCapturedStmt());
6718   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6719     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6720       llvm::Value *NumThreads = nullptr;
6721       llvm::Value *CondVal = nullptr;
6722       // Handle if clause. If if clause present, the number of threads is
6723       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6724       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6725         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6726         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6727         const OMPIfClause *IfClause = nullptr;
6728         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6729           if (C->getNameModifier() == OMPD_unknown ||
6730               C->getNameModifier() == OMPD_parallel) {
6731             IfClause = C;
6732             break;
6733           }
6734         }
6735         if (IfClause) {
6736           const Expr *Cond = IfClause->getCondition();
6737           bool Result;
6738           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6739             if (!Result)
6740               return CGF.Builder.getInt32(1);
6741           } else {
6742             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6743             if (const auto *PreInit =
6744                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6745               for (const auto *I : PreInit->decls()) {
6746                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6747                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6748                 } else {
6749                   CodeGenFunction::AutoVarEmission Emission =
6750                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6751                   CGF.EmitAutoVarCleanups(Emission);
6752                 }
6753               }
6754             }
6755             CondVal = CGF.EvaluateExprAsBool(Cond);
6756           }
6757         }
6758       }
6759       // Check the value of num_threads clause iff if clause was not specified
6760       // or is not evaluated to false.
6761       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6762         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6763         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6764         const auto *NumThreadsClause =
6765             Dir->getSingleClause<OMPNumThreadsClause>();
6766         CodeGenFunction::LexicalScope Scope(
6767             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6768         if (const auto *PreInit =
6769                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6770           for (const auto *I : PreInit->decls()) {
6771             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6772               CGF.EmitVarDecl(cast<VarDecl>(*I));
6773             } else {
6774               CodeGenFunction::AutoVarEmission Emission =
6775                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6776               CGF.EmitAutoVarCleanups(Emission);
6777             }
6778           }
6779         }
6780         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6781         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6782                                                /*isSigned=*/false);
6783         if (DefaultThreadLimitVal)
6784           NumThreads = CGF.Builder.CreateSelect(
6785               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6786               DefaultThreadLimitVal, NumThreads);
6787       } else {
6788         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6789                                            : CGF.Builder.getInt32(0);
6790       }
6791       // Process condition of the if clause.
6792       if (CondVal) {
6793         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6794                                               CGF.Builder.getInt32(1));
6795       }
6796       return NumThreads;
6797     }
6798     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6799       return CGF.Builder.getInt32(1);
6800     return DefaultThreadLimitVal;
6801   }
6802   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6803                                : CGF.Builder.getInt32(0);
6804 }
6805 
6806 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6807     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6808     int32_t &DefaultVal) {
6809   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6810   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6811          "Expected target-based executable directive.");
6812 
6813   switch (DirectiveKind) {
6814   case OMPD_target:
6815     // Teams have no clause thread_limit
6816     return nullptr;
6817   case OMPD_target_teams:
6818   case OMPD_target_teams_distribute:
6819     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6820       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6821       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6822       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6823         if (auto Constant =
6824                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6825           DefaultVal = Constant->getExtValue();
6826       return ThreadLimit;
6827     }
6828     return nullptr;
6829   case OMPD_target_parallel:
6830   case OMPD_target_parallel_for:
6831   case OMPD_target_parallel_for_simd:
6832   case OMPD_target_teams_distribute_parallel_for:
6833   case OMPD_target_teams_distribute_parallel_for_simd: {
6834     Expr *ThreadLimit = nullptr;
6835     Expr *NumThreads = nullptr;
6836     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6837       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6838       ThreadLimit = ThreadLimitClause->getThreadLimit();
6839       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6840         if (auto Constant =
6841                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6842           DefaultVal = Constant->getExtValue();
6843     }
6844     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6845       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6846       NumThreads = NumThreadsClause->getNumThreads();
6847       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6848         if (auto Constant =
6849                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6850           if (Constant->getExtValue() < DefaultVal) {
6851             DefaultVal = Constant->getExtValue();
6852             ThreadLimit = NumThreads;
6853           }
6854         }
6855       }
6856     }
6857     return ThreadLimit;
6858   }
6859   case OMPD_target_teams_distribute_simd:
6860   case OMPD_target_simd:
6861     DefaultVal = 1;
6862     return nullptr;
6863   case OMPD_parallel:
6864   case OMPD_for:
6865   case OMPD_parallel_for:
6866   case OMPD_parallel_master:
6867   case OMPD_parallel_sections:
6868   case OMPD_for_simd:
6869   case OMPD_parallel_for_simd:
6870   case OMPD_cancel:
6871   case OMPD_cancellation_point:
6872   case OMPD_ordered:
6873   case OMPD_threadprivate:
6874   case OMPD_allocate:
6875   case OMPD_task:
6876   case OMPD_simd:
6877   case OMPD_tile:
6878   case OMPD_unroll:
6879   case OMPD_sections:
6880   case OMPD_section:
6881   case OMPD_single:
6882   case OMPD_master:
6883   case OMPD_critical:
6884   case OMPD_taskyield:
6885   case OMPD_barrier:
6886   case OMPD_taskwait:
6887   case OMPD_taskgroup:
6888   case OMPD_atomic:
6889   case OMPD_flush:
6890   case OMPD_depobj:
6891   case OMPD_scan:
6892   case OMPD_teams:
6893   case OMPD_target_data:
6894   case OMPD_target_exit_data:
6895   case OMPD_target_enter_data:
6896   case OMPD_distribute:
6897   case OMPD_distribute_simd:
6898   case OMPD_distribute_parallel_for:
6899   case OMPD_distribute_parallel_for_simd:
6900   case OMPD_teams_distribute:
6901   case OMPD_teams_distribute_simd:
6902   case OMPD_teams_distribute_parallel_for:
6903   case OMPD_teams_distribute_parallel_for_simd:
6904   case OMPD_target_update:
6905   case OMPD_declare_simd:
6906   case OMPD_declare_variant:
6907   case OMPD_begin_declare_variant:
6908   case OMPD_end_declare_variant:
6909   case OMPD_declare_target:
6910   case OMPD_end_declare_target:
6911   case OMPD_declare_reduction:
6912   case OMPD_declare_mapper:
6913   case OMPD_taskloop:
6914   case OMPD_taskloop_simd:
6915   case OMPD_master_taskloop:
6916   case OMPD_master_taskloop_simd:
6917   case OMPD_parallel_master_taskloop:
6918   case OMPD_parallel_master_taskloop_simd:
6919   case OMPD_requires:
6920   case OMPD_unknown:
6921     break;
6922   default:
6923     break;
6924   }
6925   llvm_unreachable("Unsupported directive kind.");
6926 }
6927 
6928 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6929     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6930   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6931          "Clauses associated with the teams directive expected to be emitted "
6932          "only for the host!");
6933   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6934   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6935          "Expected target-based executable directive.");
6936   CGBuilderTy &Bld = CGF.Builder;
6937   llvm::Value *ThreadLimitVal = nullptr;
6938   llvm::Value *NumThreadsVal = nullptr;
6939   switch (DirectiveKind) {
6940   case OMPD_target: {
6941     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6942     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6943       return NumThreads;
6944     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6945         CGF.getContext(), CS->getCapturedStmt());
6946     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6947       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6948         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6949         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6950         const auto *ThreadLimitClause =
6951             Dir->getSingleClause<OMPThreadLimitClause>();
6952         CodeGenFunction::LexicalScope Scope(
6953             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6954         if (const auto *PreInit =
6955                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6956           for (const auto *I : PreInit->decls()) {
6957             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6958               CGF.EmitVarDecl(cast<VarDecl>(*I));
6959             } else {
6960               CodeGenFunction::AutoVarEmission Emission =
6961                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6962               CGF.EmitAutoVarCleanups(Emission);
6963             }
6964           }
6965         }
6966         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6967             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6968         ThreadLimitVal =
6969             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6970       }
6971       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6972           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6973         CS = Dir->getInnermostCapturedStmt();
6974         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6975             CGF.getContext(), CS->getCapturedStmt());
6976         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6977       }
6978       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6979           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6980         CS = Dir->getInnermostCapturedStmt();
6981         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6982           return NumThreads;
6983       }
6984       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6985         return Bld.getInt32(1);
6986     }
6987     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6988   }
6989   case OMPD_target_teams: {
6990     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6991       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6992       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6993       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6994           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6995       ThreadLimitVal =
6996           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6997     }
6998     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6999     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7000       return NumThreads;
7001     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7002         CGF.getContext(), CS->getCapturedStmt());
7003     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7004       if (Dir->getDirectiveKind() == OMPD_distribute) {
7005         CS = Dir->getInnermostCapturedStmt();
7006         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7007           return NumThreads;
7008       }
7009     }
7010     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7011   }
7012   case OMPD_target_teams_distribute:
7013     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7014       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7015       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7016       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7017           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7018       ThreadLimitVal =
7019           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7020     }
7021     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7022   case OMPD_target_parallel:
7023   case OMPD_target_parallel_for:
7024   case OMPD_target_parallel_for_simd:
7025   case OMPD_target_teams_distribute_parallel_for:
7026   case OMPD_target_teams_distribute_parallel_for_simd: {
7027     llvm::Value *CondVal = nullptr;
7028     // Handle if clause. If if clause present, the number of threads is
7029     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7030     if (D.hasClausesOfKind<OMPIfClause>()) {
7031       const OMPIfClause *IfClause = nullptr;
7032       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7033         if (C->getNameModifier() == OMPD_unknown ||
7034             C->getNameModifier() == OMPD_parallel) {
7035           IfClause = C;
7036           break;
7037         }
7038       }
7039       if (IfClause) {
7040         const Expr *Cond = IfClause->getCondition();
7041         bool Result;
7042         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7043           if (!Result)
7044             return Bld.getInt32(1);
7045         } else {
7046           CodeGenFunction::RunCleanupsScope Scope(CGF);
7047           CondVal = CGF.EvaluateExprAsBool(Cond);
7048         }
7049       }
7050     }
7051     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7052       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7053       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7054       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7055           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7056       ThreadLimitVal =
7057           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7058     }
7059     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7060       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7061       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7062       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7063           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7064       NumThreadsVal =
7065           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7066       ThreadLimitVal = ThreadLimitVal
7067                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7068                                                                 ThreadLimitVal),
7069                                               NumThreadsVal, ThreadLimitVal)
7070                            : NumThreadsVal;
7071     }
7072     if (!ThreadLimitVal)
7073       ThreadLimitVal = Bld.getInt32(0);
7074     if (CondVal)
7075       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7076     return ThreadLimitVal;
7077   }
7078   case OMPD_target_teams_distribute_simd:
7079   case OMPD_target_simd:
7080     return Bld.getInt32(1);
7081   case OMPD_parallel:
7082   case OMPD_for:
7083   case OMPD_parallel_for:
7084   case OMPD_parallel_master:
7085   case OMPD_parallel_sections:
7086   case OMPD_for_simd:
7087   case OMPD_parallel_for_simd:
7088   case OMPD_cancel:
7089   case OMPD_cancellation_point:
7090   case OMPD_ordered:
7091   case OMPD_threadprivate:
7092   case OMPD_allocate:
7093   case OMPD_task:
7094   case OMPD_simd:
7095   case OMPD_tile:
7096   case OMPD_unroll:
7097   case OMPD_sections:
7098   case OMPD_section:
7099   case OMPD_single:
7100   case OMPD_master:
7101   case OMPD_critical:
7102   case OMPD_taskyield:
7103   case OMPD_barrier:
7104   case OMPD_taskwait:
7105   case OMPD_taskgroup:
7106   case OMPD_atomic:
7107   case OMPD_flush:
7108   case OMPD_depobj:
7109   case OMPD_scan:
7110   case OMPD_teams:
7111   case OMPD_target_data:
7112   case OMPD_target_exit_data:
7113   case OMPD_target_enter_data:
7114   case OMPD_distribute:
7115   case OMPD_distribute_simd:
7116   case OMPD_distribute_parallel_for:
7117   case OMPD_distribute_parallel_for_simd:
7118   case OMPD_teams_distribute:
7119   case OMPD_teams_distribute_simd:
7120   case OMPD_teams_distribute_parallel_for:
7121   case OMPD_teams_distribute_parallel_for_simd:
7122   case OMPD_target_update:
7123   case OMPD_declare_simd:
7124   case OMPD_declare_variant:
7125   case OMPD_begin_declare_variant:
7126   case OMPD_end_declare_variant:
7127   case OMPD_declare_target:
7128   case OMPD_end_declare_target:
7129   case OMPD_declare_reduction:
7130   case OMPD_declare_mapper:
7131   case OMPD_taskloop:
7132   case OMPD_taskloop_simd:
7133   case OMPD_master_taskloop:
7134   case OMPD_master_taskloop_simd:
7135   case OMPD_parallel_master_taskloop:
7136   case OMPD_parallel_master_taskloop_simd:
7137   case OMPD_requires:
7138   case OMPD_metadirective:
7139   case OMPD_unknown:
7140     break;
7141   default:
7142     break;
7143   }
7144   llvm_unreachable("Unsupported directive kind.");
7145 }
7146 
7147 namespace {
7148 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7149 
7150 // Utility to handle information from clauses associated with a given
7151 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7152 // It provides a convenient interface to obtain the information and generate
7153 // code for that information.
7154 class MappableExprsHandler {
7155 public:
7156   /// Values for bit flags used to specify the mapping type for
7157   /// offloading.
7158   enum OpenMPOffloadMappingFlags : uint64_t {
7159     /// No flags
7160     OMP_MAP_NONE = 0x0,
7161     /// Allocate memory on the device and move data from host to device.
7162     OMP_MAP_TO = 0x01,
7163     /// Allocate memory on the device and move data from device to host.
7164     OMP_MAP_FROM = 0x02,
7165     /// Always perform the requested mapping action on the element, even
7166     /// if it was already mapped before.
7167     OMP_MAP_ALWAYS = 0x04,
7168     /// Delete the element from the device environment, ignoring the
7169     /// current reference count associated with the element.
7170     OMP_MAP_DELETE = 0x08,
7171     /// The element being mapped is a pointer-pointee pair; both the
7172     /// pointer and the pointee should be mapped.
7173     OMP_MAP_PTR_AND_OBJ = 0x10,
7174     /// This flags signals that the base address of an entry should be
7175     /// passed to the target kernel as an argument.
7176     OMP_MAP_TARGET_PARAM = 0x20,
7177     /// Signal that the runtime library has to return the device pointer
7178     /// in the current position for the data being mapped. Used when we have the
7179     /// use_device_ptr or use_device_addr clause.
7180     OMP_MAP_RETURN_PARAM = 0x40,
7181     /// This flag signals that the reference being passed is a pointer to
7182     /// private data.
7183     OMP_MAP_PRIVATE = 0x80,
7184     /// Pass the element to the device by value.
7185     OMP_MAP_LITERAL = 0x100,
7186     /// Implicit map
7187     OMP_MAP_IMPLICIT = 0x200,
7188     /// Close is a hint to the runtime to allocate memory close to
7189     /// the target device.
7190     OMP_MAP_CLOSE = 0x400,
7191     /// 0x800 is reserved for compatibility with XLC.
7192     /// Produce a runtime error if the data is not already allocated.
7193     OMP_MAP_PRESENT = 0x1000,
7194     // Increment and decrement a separate reference counter so that the data
7195     // cannot be unmapped within the associated region.  Thus, this flag is
7196     // intended to be used on 'target' and 'target data' directives because they
7197     // are inherently structured.  It is not intended to be used on 'target
7198     // enter data' and 'target exit data' directives because they are inherently
7199     // dynamic.
7200     // This is an OpenMP extension for the sake of OpenACC support.
7201     OMP_MAP_OMPX_HOLD = 0x2000,
7202     /// Signal that the runtime library should use args as an array of
7203     /// descriptor_dim pointers and use args_size as dims. Used when we have
7204     /// non-contiguous list items in target update directive
7205     OMP_MAP_NON_CONTIG = 0x100000000000,
7206     /// The 16 MSBs of the flags indicate whether the entry is member of some
7207     /// struct/class.
7208     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7209     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7210   };
7211 
7212   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7213   static unsigned getFlagMemberOffset() {
7214     unsigned Offset = 0;
7215     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7216          Remain = Remain >> 1)
7217       Offset++;
7218     return Offset;
7219   }
7220 
7221   /// Class that holds debugging information for a data mapping to be passed to
7222   /// the runtime library.
7223   class MappingExprInfo {
7224     /// The variable declaration used for the data mapping.
7225     const ValueDecl *MapDecl = nullptr;
7226     /// The original expression used in the map clause, or null if there is
7227     /// none.
7228     const Expr *MapExpr = nullptr;
7229 
7230   public:
7231     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7232         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7233 
7234     const ValueDecl *getMapDecl() const { return MapDecl; }
7235     const Expr *getMapExpr() const { return MapExpr; }
7236   };
7237 
7238   /// Class that associates information with a base pointer to be passed to the
7239   /// runtime library.
7240   class BasePointerInfo {
7241     /// The base pointer.
7242     llvm::Value *Ptr = nullptr;
7243     /// The base declaration that refers to this device pointer, or null if
7244     /// there is none.
7245     const ValueDecl *DevPtrDecl = nullptr;
7246 
7247   public:
7248     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7249         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7250     llvm::Value *operator*() const { return Ptr; }
7251     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7252     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7253   };
7254 
7255   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7256   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7257   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7258   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7259   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7260   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7261   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7262 
7263   /// This structure contains combined information generated for mappable
7264   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7265   /// mappers, and non-contiguous information.
7266   struct MapCombinedInfoTy {
7267     struct StructNonContiguousInfo {
7268       bool IsNonContiguous = false;
7269       MapDimArrayTy Dims;
7270       MapNonContiguousArrayTy Offsets;
7271       MapNonContiguousArrayTy Counts;
7272       MapNonContiguousArrayTy Strides;
7273     };
7274     MapExprsArrayTy Exprs;
7275     MapBaseValuesArrayTy BasePointers;
7276     MapValuesArrayTy Pointers;
7277     MapValuesArrayTy Sizes;
7278     MapFlagsArrayTy Types;
7279     MapMappersArrayTy Mappers;
7280     StructNonContiguousInfo NonContigInfo;
7281 
7282     /// Append arrays in \a CurInfo.
7283     void append(MapCombinedInfoTy &CurInfo) {
7284       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7285       BasePointers.append(CurInfo.BasePointers.begin(),
7286                           CurInfo.BasePointers.end());
7287       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7288       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7289       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7290       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7291       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7292                                  CurInfo.NonContigInfo.Dims.end());
7293       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7294                                     CurInfo.NonContigInfo.Offsets.end());
7295       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7296                                    CurInfo.NonContigInfo.Counts.end());
7297       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7298                                     CurInfo.NonContigInfo.Strides.end());
7299     }
7300   };
7301 
7302   /// Map between a struct and the its lowest & highest elements which have been
7303   /// mapped.
7304   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7305   ///                    HE(FieldIndex, Pointer)}
7306   struct StructRangeInfoTy {
7307     MapCombinedInfoTy PreliminaryMapData;
7308     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7309         0, Address::invalid()};
7310     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7311         0, Address::invalid()};
7312     Address Base = Address::invalid();
7313     Address LB = Address::invalid();
7314     bool IsArraySection = false;
7315     bool HasCompleteRecord = false;
7316   };
7317 
7318 private:
7319   /// Kind that defines how a device pointer has to be returned.
7320   struct MapInfo {
7321     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7322     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7323     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7324     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7325     bool ReturnDevicePointer = false;
7326     bool IsImplicit = false;
7327     const ValueDecl *Mapper = nullptr;
7328     const Expr *VarRef = nullptr;
7329     bool ForDeviceAddr = false;
7330 
7331     MapInfo() = default;
7332     MapInfo(
7333         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7334         OpenMPMapClauseKind MapType,
7335         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7336         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7337         bool ReturnDevicePointer, bool IsImplicit,
7338         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7339         bool ForDeviceAddr = false)
7340         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7341           MotionModifiers(MotionModifiers),
7342           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7343           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7344   };
7345 
7346   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7347   /// member and there is no map information about it, then emission of that
7348   /// entry is deferred until the whole struct has been processed.
7349   struct DeferredDevicePtrEntryTy {
7350     const Expr *IE = nullptr;
7351     const ValueDecl *VD = nullptr;
7352     bool ForDeviceAddr = false;
7353 
7354     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7355                              bool ForDeviceAddr)
7356         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7357   };
7358 
7359   /// The target directive from where the mappable clauses were extracted. It
7360   /// is either a executable directive or a user-defined mapper directive.
7361   llvm::PointerUnion<const OMPExecutableDirective *,
7362                      const OMPDeclareMapperDecl *>
7363       CurDir;
7364 
7365   /// Function the directive is being generated for.
7366   CodeGenFunction &CGF;
7367 
7368   /// Set of all first private variables in the current directive.
7369   /// bool data is set to true if the variable is implicitly marked as
7370   /// firstprivate, false otherwise.
7371   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7372 
7373   /// Map between device pointer declarations and their expression components.
7374   /// The key value for declarations in 'this' is null.
7375   llvm::DenseMap<
7376       const ValueDecl *,
7377       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7378       DevPointersMap;
7379 
7380   /// Map between lambda declarations and their map type.
7381   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7382 
7383   llvm::Value *getExprTypeSize(const Expr *E) const {
7384     QualType ExprTy = E->getType().getCanonicalType();
7385 
7386     // Calculate the size for array shaping expression.
7387     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7388       llvm::Value *Size =
7389           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7390       for (const Expr *SE : OAE->getDimensions()) {
7391         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7392         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7393                                       CGF.getContext().getSizeType(),
7394                                       SE->getExprLoc());
7395         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7396       }
7397       return Size;
7398     }
7399 
7400     // Reference types are ignored for mapping purposes.
7401     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7402       ExprTy = RefTy->getPointeeType().getCanonicalType();
7403 
7404     // Given that an array section is considered a built-in type, we need to
7405     // do the calculation based on the length of the section instead of relying
7406     // on CGF.getTypeSize(E->getType()).
7407     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7408       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7409                             OAE->getBase()->IgnoreParenImpCasts())
7410                             .getCanonicalType();
7411 
7412       // If there is no length associated with the expression and lower bound is
7413       // not specified too, that means we are using the whole length of the
7414       // base.
7415       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7416           !OAE->getLowerBound())
7417         return CGF.getTypeSize(BaseTy);
7418 
7419       llvm::Value *ElemSize;
7420       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7421         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7422       } else {
7423         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7424         assert(ATy && "Expecting array type if not a pointer type.");
7425         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7426       }
7427 
7428       // If we don't have a length at this point, that is because we have an
7429       // array section with a single element.
7430       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7431         return ElemSize;
7432 
7433       if (const Expr *LenExpr = OAE->getLength()) {
7434         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7435         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7436                                              CGF.getContext().getSizeType(),
7437                                              LenExpr->getExprLoc());
7438         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7439       }
7440       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7441              OAE->getLowerBound() && "expected array_section[lb:].");
7442       // Size = sizetype - lb * elemtype;
7443       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7444       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7445       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7446                                        CGF.getContext().getSizeType(),
7447                                        OAE->getLowerBound()->getExprLoc());
7448       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7449       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7450       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7451       LengthVal = CGF.Builder.CreateSelect(
7452           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7453       return LengthVal;
7454     }
7455     return CGF.getTypeSize(ExprTy);
7456   }
7457 
7458   /// Return the corresponding bits for a given map clause modifier. Add
7459   /// a flag marking the map as a pointer if requested. Add a flag marking the
7460   /// map as the first one of a series of maps that relate to the same map
7461   /// expression.
7462   OpenMPOffloadMappingFlags getMapTypeBits(
7463       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7464       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7465       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7466     OpenMPOffloadMappingFlags Bits =
7467         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7468     switch (MapType) {
7469     case OMPC_MAP_alloc:
7470     case OMPC_MAP_release:
7471       // alloc and release is the default behavior in the runtime library,  i.e.
7472       // if we don't pass any bits alloc/release that is what the runtime is
7473       // going to do. Therefore, we don't need to signal anything for these two
7474       // type modifiers.
7475       break;
7476     case OMPC_MAP_to:
7477       Bits |= OMP_MAP_TO;
7478       break;
7479     case OMPC_MAP_from:
7480       Bits |= OMP_MAP_FROM;
7481       break;
7482     case OMPC_MAP_tofrom:
7483       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7484       break;
7485     case OMPC_MAP_delete:
7486       Bits |= OMP_MAP_DELETE;
7487       break;
7488     case OMPC_MAP_unknown:
7489       llvm_unreachable("Unexpected map type!");
7490     }
7491     if (AddPtrFlag)
7492       Bits |= OMP_MAP_PTR_AND_OBJ;
7493     if (AddIsTargetParamFlag)
7494       Bits |= OMP_MAP_TARGET_PARAM;
7495     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7496       Bits |= OMP_MAP_ALWAYS;
7497     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7498       Bits |= OMP_MAP_CLOSE;
7499     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7500         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7501       Bits |= OMP_MAP_PRESENT;
7502     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7503       Bits |= OMP_MAP_OMPX_HOLD;
7504     if (IsNonContiguous)
7505       Bits |= OMP_MAP_NON_CONTIG;
7506     return Bits;
7507   }
7508 
7509   /// Return true if the provided expression is a final array section. A
7510   /// final array section, is one whose length can't be proved to be one.
7511   bool isFinalArraySectionExpression(const Expr *E) const {
7512     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7513 
7514     // It is not an array section and therefore not a unity-size one.
7515     if (!OASE)
7516       return false;
7517 
7518     // An array section with no colon always refer to a single element.
7519     if (OASE->getColonLocFirst().isInvalid())
7520       return false;
7521 
7522     const Expr *Length = OASE->getLength();
7523 
7524     // If we don't have a length we have to check if the array has size 1
7525     // for this dimension. Also, we should always expect a length if the
7526     // base type is pointer.
7527     if (!Length) {
7528       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7529                              OASE->getBase()->IgnoreParenImpCasts())
7530                              .getCanonicalType();
7531       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7532         return ATy->getSize().getSExtValue() != 1;
7533       // If we don't have a constant dimension length, we have to consider
7534       // the current section as having any size, so it is not necessarily
7535       // unitary. If it happen to be unity size, that's user fault.
7536       return true;
7537     }
7538 
7539     // Check if the length evaluates to 1.
7540     Expr::EvalResult Result;
7541     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7542       return true; // Can have more that size 1.
7543 
7544     llvm::APSInt ConstLength = Result.Val.getInt();
7545     return ConstLength.getSExtValue() != 1;
7546   }
7547 
7548   /// Generate the base pointers, section pointers, sizes, map type bits, and
7549   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7550   /// map type, map or motion modifiers, and expression components.
7551   /// \a IsFirstComponent should be set to true if the provided set of
7552   /// components is the first associated with a capture.
7553   void generateInfoForComponentList(
7554       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7555       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7556       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7557       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7558       bool IsFirstComponentList, bool IsImplicit,
7559       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7560       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7561       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7562           OverlappedElements = llvm::None) const {
7563     // The following summarizes what has to be generated for each map and the
7564     // types below. The generated information is expressed in this order:
7565     // base pointer, section pointer, size, flags
7566     // (to add to the ones that come from the map type and modifier).
7567     //
7568     // double d;
7569     // int i[100];
7570     // float *p;
7571     //
7572     // struct S1 {
7573     //   int i;
7574     //   float f[50];
7575     // }
7576     // struct S2 {
7577     //   int i;
7578     //   float f[50];
7579     //   S1 s;
7580     //   double *p;
7581     //   struct S2 *ps;
7582     //   int &ref;
7583     // }
7584     // S2 s;
7585     // S2 *ps;
7586     //
7587     // map(d)
7588     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7589     //
7590     // map(i)
7591     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7592     //
7593     // map(i[1:23])
7594     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7595     //
7596     // map(p)
7597     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7598     //
7599     // map(p[1:24])
7600     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7601     // in unified shared memory mode or for local pointers
7602     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7603     //
7604     // map(s)
7605     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7606     //
7607     // map(s.i)
7608     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7609     //
7610     // map(s.s.f)
7611     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7612     //
7613     // map(s.p)
7614     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7615     //
7616     // map(to: s.p[:22])
7617     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7618     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7619     // &(s.p), &(s.p[0]), 22*sizeof(double),
7620     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7621     // (*) alloc space for struct members, only this is a target parameter
7622     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7623     //      optimizes this entry out, same in the examples below)
7624     // (***) map the pointee (map: to)
7625     //
7626     // map(to: s.ref)
7627     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7628     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7629     // (*) alloc space for struct members, only this is a target parameter
7630     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7631     //      optimizes this entry out, same in the examples below)
7632     // (***) map the pointee (map: to)
7633     //
7634     // map(s.ps)
7635     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7636     //
7637     // map(from: s.ps->s.i)
7638     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7639     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7640     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7641     //
7642     // map(to: s.ps->ps)
7643     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7644     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7645     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7646     //
7647     // map(s.ps->ps->ps)
7648     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7649     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7650     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7651     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7652     //
7653     // map(to: s.ps->ps->s.f[:22])
7654     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7655     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7656     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7657     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7658     //
7659     // map(ps)
7660     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7661     //
7662     // map(ps->i)
7663     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7664     //
7665     // map(ps->s.f)
7666     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7667     //
7668     // map(from: ps->p)
7669     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7670     //
7671     // map(to: ps->p[:22])
7672     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7673     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7674     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7675     //
7676     // map(ps->ps)
7677     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7678     //
7679     // map(from: ps->ps->s.i)
7680     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7681     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7682     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7683     //
7684     // map(from: ps->ps->ps)
7685     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7686     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7687     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7688     //
7689     // map(ps->ps->ps->ps)
7690     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7691     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7692     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7693     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7694     //
7695     // map(to: ps->ps->ps->s.f[:22])
7696     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7697     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7698     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7699     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7700     //
7701     // map(to: s.f[:22]) map(from: s.p[:33])
7702     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7703     //     sizeof(double*) (**), TARGET_PARAM
7704     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7705     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7706     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7707     // (*) allocate contiguous space needed to fit all mapped members even if
7708     //     we allocate space for members not mapped (in this example,
7709     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7710     //     them as well because they fall between &s.f[0] and &s.p)
7711     //
7712     // map(from: s.f[:22]) map(to: ps->p[:33])
7713     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7714     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7715     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7716     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7717     // (*) the struct this entry pertains to is the 2nd element in the list of
7718     //     arguments, hence MEMBER_OF(2)
7719     //
7720     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7721     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7722     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7723     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7724     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7725     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7726     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7727     // (*) the struct this entry pertains to is the 4th element in the list
7728     //     of arguments, hence MEMBER_OF(4)
7729 
7730     // Track if the map information being generated is the first for a capture.
7731     bool IsCaptureFirstInfo = IsFirstComponentList;
7732     // When the variable is on a declare target link or in a to clause with
7733     // unified memory, a reference is needed to hold the host/device address
7734     // of the variable.
7735     bool RequiresReference = false;
7736 
7737     // Scan the components from the base to the complete expression.
7738     auto CI = Components.rbegin();
7739     auto CE = Components.rend();
7740     auto I = CI;
7741 
7742     // Track if the map information being generated is the first for a list of
7743     // components.
7744     bool IsExpressionFirstInfo = true;
7745     bool FirstPointerInComplexData = false;
7746     Address BP = Address::invalid();
7747     const Expr *AssocExpr = I->getAssociatedExpression();
7748     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7749     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7750     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7751 
7752     if (isa<MemberExpr>(AssocExpr)) {
7753       // The base is the 'this' pointer. The content of the pointer is going
7754       // to be the base of the field being mapped.
7755       BP = CGF.LoadCXXThisAddress();
7756     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7757                (OASE &&
7758                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7759       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7760     } else if (OAShE &&
7761                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7762       BP = Address(
7763           CGF.EmitScalarExpr(OAShE->getBase()),
7764           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7765           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7766     } else {
7767       // The base is the reference to the variable.
7768       // BP = &Var.
7769       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7770       if (const auto *VD =
7771               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7772         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7773                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7774           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7775               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7776                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7777             RequiresReference = true;
7778             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7779           }
7780         }
7781       }
7782 
7783       // If the variable is a pointer and is being dereferenced (i.e. is not
7784       // the last component), the base has to be the pointer itself, not its
7785       // reference. References are ignored for mapping purposes.
7786       QualType Ty =
7787           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7788       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7789         // No need to generate individual map information for the pointer, it
7790         // can be associated with the combined storage if shared memory mode is
7791         // active or the base declaration is not global variable.
7792         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7793         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7794             !VD || VD->hasLocalStorage())
7795           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7796         else
7797           FirstPointerInComplexData = true;
7798         ++I;
7799       }
7800     }
7801 
7802     // Track whether a component of the list should be marked as MEMBER_OF some
7803     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7804     // in a component list should be marked as MEMBER_OF, all subsequent entries
7805     // do not belong to the base struct. E.g.
7806     // struct S2 s;
7807     // s.ps->ps->ps->f[:]
7808     //   (1) (2) (3) (4)
7809     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7810     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7811     // is the pointee of ps(2) which is not member of struct s, so it should not
7812     // be marked as such (it is still PTR_AND_OBJ).
7813     // The variable is initialized to false so that PTR_AND_OBJ entries which
7814     // are not struct members are not considered (e.g. array of pointers to
7815     // data).
7816     bool ShouldBeMemberOf = false;
7817 
7818     // Variable keeping track of whether or not we have encountered a component
7819     // in the component list which is a member expression. Useful when we have a
7820     // pointer or a final array section, in which case it is the previous
7821     // component in the list which tells us whether we have a member expression.
7822     // E.g. X.f[:]
7823     // While processing the final array section "[:]" it is "f" which tells us
7824     // whether we are dealing with a member of a declared struct.
7825     const MemberExpr *EncounteredME = nullptr;
7826 
7827     // Track for the total number of dimension. Start from one for the dummy
7828     // dimension.
7829     uint64_t DimSize = 1;
7830 
7831     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7832     bool IsPrevMemberReference = false;
7833 
7834     for (; I != CE; ++I) {
7835       // If the current component is member of a struct (parent struct) mark it.
7836       if (!EncounteredME) {
7837         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7838         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7839         // as MEMBER_OF the parent struct.
7840         if (EncounteredME) {
7841           ShouldBeMemberOf = true;
7842           // Do not emit as complex pointer if this is actually not array-like
7843           // expression.
7844           if (FirstPointerInComplexData) {
7845             QualType Ty = std::prev(I)
7846                               ->getAssociatedDeclaration()
7847                               ->getType()
7848                               .getNonReferenceType();
7849             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7850             FirstPointerInComplexData = false;
7851           }
7852         }
7853       }
7854 
7855       auto Next = std::next(I);
7856 
7857       // We need to generate the addresses and sizes if this is the last
7858       // component, if the component is a pointer or if it is an array section
7859       // whose length can't be proved to be one. If this is a pointer, it
7860       // becomes the base address for the following components.
7861 
7862       // A final array section, is one whose length can't be proved to be one.
7863       // If the map item is non-contiguous then we don't treat any array section
7864       // as final array section.
7865       bool IsFinalArraySection =
7866           !IsNonContiguous &&
7867           isFinalArraySectionExpression(I->getAssociatedExpression());
7868 
7869       // If we have a declaration for the mapping use that, otherwise use
7870       // the base declaration of the map clause.
7871       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7872                                      ? I->getAssociatedDeclaration()
7873                                      : BaseDecl;
7874       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7875                                                : MapExpr;
7876 
7877       // Get information on whether the element is a pointer. Have to do a
7878       // special treatment for array sections given that they are built-in
7879       // types.
7880       const auto *OASE =
7881           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7882       const auto *OAShE =
7883           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7884       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7885       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7886       bool IsPointer =
7887           OAShE ||
7888           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7889                        .getCanonicalType()
7890                        ->isAnyPointerType()) ||
7891           I->getAssociatedExpression()->getType()->isAnyPointerType();
7892       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7893                                MapDecl &&
7894                                MapDecl->getType()->isLValueReferenceType();
7895       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7896 
7897       if (OASE)
7898         ++DimSize;
7899 
7900       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7901           IsFinalArraySection) {
7902         // If this is not the last component, we expect the pointer to be
7903         // associated with an array expression or member expression.
7904         assert((Next == CE ||
7905                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7906                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7907                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7908                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7909                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7910                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7911                "Unexpected expression");
7912 
7913         Address LB = Address::invalid();
7914         Address LowestElem = Address::invalid();
7915         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7916                                        const MemberExpr *E) {
7917           const Expr *BaseExpr = E->getBase();
7918           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7919           // scalar.
7920           LValue BaseLV;
7921           if (E->isArrow()) {
7922             LValueBaseInfo BaseInfo;
7923             TBAAAccessInfo TBAAInfo;
7924             Address Addr =
7925                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7926             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7927             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7928           } else {
7929             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7930           }
7931           return BaseLV;
7932         };
7933         if (OAShE) {
7934           LowestElem = LB =
7935               Address(CGF.EmitScalarExpr(OAShE->getBase()),
7936                       CGF.ConvertTypeForMem(
7937                           OAShE->getBase()->getType()->getPointeeType()),
7938                       CGF.getContext().getTypeAlignInChars(
7939                           OAShE->getBase()->getType()));
7940         } else if (IsMemberReference) {
7941           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7942           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7943           LowestElem = CGF.EmitLValueForFieldInitialization(
7944                               BaseLVal, cast<FieldDecl>(MapDecl))
7945                            .getAddress(CGF);
7946           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7947                    .getAddress(CGF);
7948         } else {
7949           LowestElem = LB =
7950               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7951                   .getAddress(CGF);
7952         }
7953 
7954         // If this component is a pointer inside the base struct then we don't
7955         // need to create any entry for it - it will be combined with the object
7956         // it is pointing to into a single PTR_AND_OBJ entry.
7957         bool IsMemberPointerOrAddr =
7958             EncounteredME &&
7959             (((IsPointer || ForDeviceAddr) &&
7960               I->getAssociatedExpression() == EncounteredME) ||
7961              (IsPrevMemberReference && !IsPointer) ||
7962              (IsMemberReference && Next != CE &&
7963               !Next->getAssociatedExpression()->getType()->isPointerType()));
7964         if (!OverlappedElements.empty() && Next == CE) {
7965           // Handle base element with the info for overlapped elements.
7966           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7967           assert(!IsPointer &&
7968                  "Unexpected base element with the pointer type.");
7969           // Mark the whole struct as the struct that requires allocation on the
7970           // device.
7971           PartialStruct.LowestElem = {0, LowestElem};
7972           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7973               I->getAssociatedExpression()->getType());
7974           Address HB = CGF.Builder.CreateConstGEP(
7975               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7976                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7977               TypeSize.getQuantity() - 1);
7978           PartialStruct.HighestElem = {
7979               std::numeric_limits<decltype(
7980                   PartialStruct.HighestElem.first)>::max(),
7981               HB};
7982           PartialStruct.Base = BP;
7983           PartialStruct.LB = LB;
7984           assert(
7985               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7986               "Overlapped elements must be used only once for the variable.");
7987           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7988           // Emit data for non-overlapped data.
7989           OpenMPOffloadMappingFlags Flags =
7990               OMP_MAP_MEMBER_OF |
7991               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7992                              /*AddPtrFlag=*/false,
7993                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7994           llvm::Value *Size = nullptr;
7995           // Do bitcopy of all non-overlapped structure elements.
7996           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7997                    Component : OverlappedElements) {
7998             Address ComponentLB = Address::invalid();
7999             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8000                  Component) {
8001               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8002                 const auto *FD = dyn_cast<FieldDecl>(VD);
8003                 if (FD && FD->getType()->isLValueReferenceType()) {
8004                   const auto *ME =
8005                       cast<MemberExpr>(MC.getAssociatedExpression());
8006                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8007                   ComponentLB =
8008                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8009                           .getAddress(CGF);
8010                 } else {
8011                   ComponentLB =
8012                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8013                           .getAddress(CGF);
8014                 }
8015                 Size = CGF.Builder.CreatePtrDiff(
8016                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8017                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8018                 break;
8019               }
8020             }
8021             assert(Size && "Failed to determine structure size");
8022             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8023             CombinedInfo.BasePointers.push_back(BP.getPointer());
8024             CombinedInfo.Pointers.push_back(LB.getPointer());
8025             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8026                 Size, CGF.Int64Ty, /*isSigned=*/true));
8027             CombinedInfo.Types.push_back(Flags);
8028             CombinedInfo.Mappers.push_back(nullptr);
8029             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8030                                                                       : 1);
8031             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8032           }
8033           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8034           CombinedInfo.BasePointers.push_back(BP.getPointer());
8035           CombinedInfo.Pointers.push_back(LB.getPointer());
8036           Size = CGF.Builder.CreatePtrDiff(
8037               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8038               CGF.EmitCastToVoidPtr(LB.getPointer()));
8039           CombinedInfo.Sizes.push_back(
8040               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8041           CombinedInfo.Types.push_back(Flags);
8042           CombinedInfo.Mappers.push_back(nullptr);
8043           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8044                                                                     : 1);
8045           break;
8046         }
8047         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8048         if (!IsMemberPointerOrAddr ||
8049             (Next == CE && MapType != OMPC_MAP_unknown)) {
8050           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8051           CombinedInfo.BasePointers.push_back(BP.getPointer());
8052           CombinedInfo.Pointers.push_back(LB.getPointer());
8053           CombinedInfo.Sizes.push_back(
8054               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8055           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8056                                                                     : 1);
8057 
8058           // If Mapper is valid, the last component inherits the mapper.
8059           bool HasMapper = Mapper && Next == CE;
8060           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8061 
8062           // We need to add a pointer flag for each map that comes from the
8063           // same expression except for the first one. We also need to signal
8064           // this map is the first one that relates with the current capture
8065           // (there is a set of entries for each capture).
8066           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8067               MapType, MapModifiers, MotionModifiers, IsImplicit,
8068               !IsExpressionFirstInfo || RequiresReference ||
8069                   FirstPointerInComplexData || IsMemberReference,
8070               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8071 
8072           if (!IsExpressionFirstInfo || IsMemberReference) {
8073             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8074             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8075             if (IsPointer || (IsMemberReference && Next != CE))
8076               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8077                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8078 
8079             if (ShouldBeMemberOf) {
8080               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8081               // should be later updated with the correct value of MEMBER_OF.
8082               Flags |= OMP_MAP_MEMBER_OF;
8083               // From now on, all subsequent PTR_AND_OBJ entries should not be
8084               // marked as MEMBER_OF.
8085               ShouldBeMemberOf = false;
8086             }
8087           }
8088 
8089           CombinedInfo.Types.push_back(Flags);
8090         }
8091 
8092         // If we have encountered a member expression so far, keep track of the
8093         // mapped member. If the parent is "*this", then the value declaration
8094         // is nullptr.
8095         if (EncounteredME) {
8096           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8097           unsigned FieldIndex = FD->getFieldIndex();
8098 
8099           // Update info about the lowest and highest elements for this struct
8100           if (!PartialStruct.Base.isValid()) {
8101             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8102             if (IsFinalArraySection) {
8103               Address HB =
8104                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8105                       .getAddress(CGF);
8106               PartialStruct.HighestElem = {FieldIndex, HB};
8107             } else {
8108               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8109             }
8110             PartialStruct.Base = BP;
8111             PartialStruct.LB = BP;
8112           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8113             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8114           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8115             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8116           }
8117         }
8118 
8119         // Need to emit combined struct for array sections.
8120         if (IsFinalArraySection || IsNonContiguous)
8121           PartialStruct.IsArraySection = true;
8122 
8123         // If we have a final array section, we are done with this expression.
8124         if (IsFinalArraySection)
8125           break;
8126 
8127         // The pointer becomes the base for the next element.
8128         if (Next != CE)
8129           BP = IsMemberReference ? LowestElem : LB;
8130 
8131         IsExpressionFirstInfo = false;
8132         IsCaptureFirstInfo = false;
8133         FirstPointerInComplexData = false;
8134         IsPrevMemberReference = IsMemberReference;
8135       } else if (FirstPointerInComplexData) {
8136         QualType Ty = Components.rbegin()
8137                           ->getAssociatedDeclaration()
8138                           ->getType()
8139                           .getNonReferenceType();
8140         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8141         FirstPointerInComplexData = false;
8142       }
8143     }
8144     // If ran into the whole component - allocate the space for the whole
8145     // record.
8146     if (!EncounteredME)
8147       PartialStruct.HasCompleteRecord = true;
8148 
8149     if (!IsNonContiguous)
8150       return;
8151 
8152     const ASTContext &Context = CGF.getContext();
8153 
8154     // For supporting stride in array section, we need to initialize the first
8155     // dimension size as 1, first offset as 0, and first count as 1
8156     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8157     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8158     MapValuesArrayTy CurStrides;
8159     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8160     uint64_t ElementTypeSize;
8161 
8162     // Collect Size information for each dimension and get the element size as
8163     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8164     // should be [10, 10] and the first stride is 4 btyes.
8165     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8166          Components) {
8167       const Expr *AssocExpr = Component.getAssociatedExpression();
8168       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8169 
8170       if (!OASE)
8171         continue;
8172 
8173       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8174       auto *CAT = Context.getAsConstantArrayType(Ty);
8175       auto *VAT = Context.getAsVariableArrayType(Ty);
8176 
8177       // We need all the dimension size except for the last dimension.
8178       assert((VAT || CAT || &Component == &*Components.begin()) &&
8179              "Should be either ConstantArray or VariableArray if not the "
8180              "first Component");
8181 
8182       // Get element size if CurStrides is empty.
8183       if (CurStrides.empty()) {
8184         const Type *ElementType = nullptr;
8185         if (CAT)
8186           ElementType = CAT->getElementType().getTypePtr();
8187         else if (VAT)
8188           ElementType = VAT->getElementType().getTypePtr();
8189         else
8190           assert(&Component == &*Components.begin() &&
8191                  "Only expect pointer (non CAT or VAT) when this is the "
8192                  "first Component");
8193         // If ElementType is null, then it means the base is a pointer
8194         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8195         // for next iteration.
8196         if (ElementType) {
8197           // For the case that having pointer as base, we need to remove one
8198           // level of indirection.
8199           if (&Component != &*Components.begin())
8200             ElementType = ElementType->getPointeeOrArrayElementType();
8201           ElementTypeSize =
8202               Context.getTypeSizeInChars(ElementType).getQuantity();
8203           CurStrides.push_back(
8204               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8205         }
8206       }
8207       // Get dimension value except for the last dimension since we don't need
8208       // it.
8209       if (DimSizes.size() < Components.size() - 1) {
8210         if (CAT)
8211           DimSizes.push_back(llvm::ConstantInt::get(
8212               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8213         else if (VAT)
8214           DimSizes.push_back(CGF.Builder.CreateIntCast(
8215               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8216               /*IsSigned=*/false));
8217       }
8218     }
8219 
8220     // Skip the dummy dimension since we have already have its information.
8221     auto *DI = DimSizes.begin() + 1;
8222     // Product of dimension.
8223     llvm::Value *DimProd =
8224         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8225 
8226     // Collect info for non-contiguous. Notice that offset, count, and stride
8227     // are only meaningful for array-section, so we insert a null for anything
8228     // other than array-section.
8229     // Also, the size of offset, count, and stride are not the same as
8230     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8231     // count, and stride are the same as the number of non-contiguous
8232     // declaration in target update to/from clause.
8233     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8234          Components) {
8235       const Expr *AssocExpr = Component.getAssociatedExpression();
8236 
8237       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8238         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8239             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8240             /*isSigned=*/false);
8241         CurOffsets.push_back(Offset);
8242         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8243         CurStrides.push_back(CurStrides.back());
8244         continue;
8245       }
8246 
8247       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8248 
8249       if (!OASE)
8250         continue;
8251 
8252       // Offset
8253       const Expr *OffsetExpr = OASE->getLowerBound();
8254       llvm::Value *Offset = nullptr;
8255       if (!OffsetExpr) {
8256         // If offset is absent, then we just set it to zero.
8257         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8258       } else {
8259         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8260                                            CGF.Int64Ty,
8261                                            /*isSigned=*/false);
8262       }
8263       CurOffsets.push_back(Offset);
8264 
8265       // Count
8266       const Expr *CountExpr = OASE->getLength();
8267       llvm::Value *Count = nullptr;
8268       if (!CountExpr) {
8269         // In Clang, once a high dimension is an array section, we construct all
8270         // the lower dimension as array section, however, for case like
8271         // arr[0:2][2], Clang construct the inner dimension as an array section
8272         // but it actually is not in an array section form according to spec.
8273         if (!OASE->getColonLocFirst().isValid() &&
8274             !OASE->getColonLocSecond().isValid()) {
8275           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8276         } else {
8277           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8278           // When the length is absent it defaults to ⌈(size −
8279           // lower-bound)/stride⌉, where size is the size of the array
8280           // dimension.
8281           const Expr *StrideExpr = OASE->getStride();
8282           llvm::Value *Stride =
8283               StrideExpr
8284                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8285                                               CGF.Int64Ty, /*isSigned=*/false)
8286                   : nullptr;
8287           if (Stride)
8288             Count = CGF.Builder.CreateUDiv(
8289                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8290           else
8291             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8292         }
8293       } else {
8294         Count = CGF.EmitScalarExpr(CountExpr);
8295       }
8296       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8297       CurCounts.push_back(Count);
8298 
8299       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8300       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8301       //              Offset      Count     Stride
8302       //    D0          0           1         4    (int)    <- dummy dimension
8303       //    D1          0           2         8    (2 * (1) * 4)
8304       //    D2          1           2         20   (1 * (1 * 5) * 4)
8305       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8306       const Expr *StrideExpr = OASE->getStride();
8307       llvm::Value *Stride =
8308           StrideExpr
8309               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8310                                           CGF.Int64Ty, /*isSigned=*/false)
8311               : nullptr;
8312       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8313       if (Stride)
8314         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8315       else
8316         CurStrides.push_back(DimProd);
8317       if (DI != DimSizes.end())
8318         ++DI;
8319     }
8320 
8321     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8322     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8323     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8324   }
8325 
8326   /// Return the adjusted map modifiers if the declaration a capture refers to
8327   /// appears in a first-private clause. This is expected to be used only with
8328   /// directives that start with 'target'.
8329   MappableExprsHandler::OpenMPOffloadMappingFlags
8330   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8331     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8332 
8333     // A first private variable captured by reference will use only the
8334     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8335     // declaration is known as first-private in this handler.
8336     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8337       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8338         return MappableExprsHandler::OMP_MAP_TO |
8339                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8340       return MappableExprsHandler::OMP_MAP_PRIVATE |
8341              MappableExprsHandler::OMP_MAP_TO;
8342     }
8343     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8344     if (I != LambdasMap.end())
8345       // for map(to: lambda): using user specified map type.
8346       return getMapTypeBits(
8347           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8348           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8349           /*AddPtrFlag=*/false,
8350           /*AddIsTargetParamFlag=*/false,
8351           /*isNonContiguous=*/false);
8352     return MappableExprsHandler::OMP_MAP_TO |
8353            MappableExprsHandler::OMP_MAP_FROM;
8354   }
8355 
8356   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8357     // Rotate by getFlagMemberOffset() bits.
8358     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8359                                                   << getFlagMemberOffset());
8360   }
8361 
8362   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8363                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8364     // If the entry is PTR_AND_OBJ but has not been marked with the special
8365     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8366     // marked as MEMBER_OF.
8367     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8368         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8369       return;
8370 
8371     // Reset the placeholder value to prepare the flag for the assignment of the
8372     // proper MEMBER_OF value.
8373     Flags &= ~OMP_MAP_MEMBER_OF;
8374     Flags |= MemberOfFlag;
8375   }
8376 
8377   void getPlainLayout(const CXXRecordDecl *RD,
8378                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8379                       bool AsBase) const {
8380     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8381 
8382     llvm::StructType *St =
8383         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8384 
8385     unsigned NumElements = St->getNumElements();
8386     llvm::SmallVector<
8387         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8388         RecordLayout(NumElements);
8389 
8390     // Fill bases.
8391     for (const auto &I : RD->bases()) {
8392       if (I.isVirtual())
8393         continue;
8394       const auto *Base = I.getType()->getAsCXXRecordDecl();
8395       // Ignore empty bases.
8396       if (Base->isEmpty() || CGF.getContext()
8397                                  .getASTRecordLayout(Base)
8398                                  .getNonVirtualSize()
8399                                  .isZero())
8400         continue;
8401 
8402       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8403       RecordLayout[FieldIndex] = Base;
8404     }
8405     // Fill in virtual bases.
8406     for (const auto &I : RD->vbases()) {
8407       const auto *Base = I.getType()->getAsCXXRecordDecl();
8408       // Ignore empty bases.
8409       if (Base->isEmpty())
8410         continue;
8411       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8412       if (RecordLayout[FieldIndex])
8413         continue;
8414       RecordLayout[FieldIndex] = Base;
8415     }
8416     // Fill in all the fields.
8417     assert(!RD->isUnion() && "Unexpected union.");
8418     for (const auto *Field : RD->fields()) {
8419       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8420       // will fill in later.)
8421       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8422         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8423         RecordLayout[FieldIndex] = Field;
8424       }
8425     }
8426     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8427              &Data : RecordLayout) {
8428       if (Data.isNull())
8429         continue;
8430       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8431         getPlainLayout(Base, Layout, /*AsBase=*/true);
8432       else
8433         Layout.push_back(Data.get<const FieldDecl *>());
8434     }
8435   }
8436 
8437   /// Generate all the base pointers, section pointers, sizes, map types, and
8438   /// mappers for the extracted mappable expressions (all included in \a
8439   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8440   /// pair of the relevant declaration and index where it occurs is appended to
8441   /// the device pointers info array.
8442   void generateAllInfoForClauses(
8443       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8444       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8445           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8446     // We have to process the component lists that relate with the same
8447     // declaration in a single chunk so that we can generate the map flags
8448     // correctly. Therefore, we organize all lists in a map.
8449     enum MapKind { Present, Allocs, Other, Total };
8450     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8451                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8452         Info;
8453 
8454     // Helper function to fill the information map for the different supported
8455     // clauses.
8456     auto &&InfoGen =
8457         [&Info, &SkipVarSet](
8458             const ValueDecl *D, MapKind Kind,
8459             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8460             OpenMPMapClauseKind MapType,
8461             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8462             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8463             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8464             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8465           if (SkipVarSet.contains(D))
8466             return;
8467           auto It = Info.find(D);
8468           if (It == Info.end())
8469             It = Info
8470                      .insert(std::make_pair(
8471                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8472                      .first;
8473           It->second[Kind].emplace_back(
8474               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8475               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8476         };
8477 
8478     for (const auto *Cl : Clauses) {
8479       const auto *C = dyn_cast<OMPMapClause>(Cl);
8480       if (!C)
8481         continue;
8482       MapKind Kind = Other;
8483       if (llvm::is_contained(C->getMapTypeModifiers(),
8484                              OMPC_MAP_MODIFIER_present))
8485         Kind = Present;
8486       else if (C->getMapType() == OMPC_MAP_alloc)
8487         Kind = Allocs;
8488       const auto *EI = C->getVarRefs().begin();
8489       for (const auto L : C->component_lists()) {
8490         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8491         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8492                 C->getMapTypeModifiers(), llvm::None,
8493                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8494                 E);
8495         ++EI;
8496       }
8497     }
8498     for (const auto *Cl : Clauses) {
8499       const auto *C = dyn_cast<OMPToClause>(Cl);
8500       if (!C)
8501         continue;
8502       MapKind Kind = Other;
8503       if (llvm::is_contained(C->getMotionModifiers(),
8504                              OMPC_MOTION_MODIFIER_present))
8505         Kind = Present;
8506       const auto *EI = C->getVarRefs().begin();
8507       for (const auto L : C->component_lists()) {
8508         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8509                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8510                 C->isImplicit(), std::get<2>(L), *EI);
8511         ++EI;
8512       }
8513     }
8514     for (const auto *Cl : Clauses) {
8515       const auto *C = dyn_cast<OMPFromClause>(Cl);
8516       if (!C)
8517         continue;
8518       MapKind Kind = Other;
8519       if (llvm::is_contained(C->getMotionModifiers(),
8520                              OMPC_MOTION_MODIFIER_present))
8521         Kind = Present;
8522       const auto *EI = C->getVarRefs().begin();
8523       for (const auto L : C->component_lists()) {
8524         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8525                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8526                 C->isImplicit(), std::get<2>(L), *EI);
8527         ++EI;
8528       }
8529     }
8530 
8531     // Look at the use_device_ptr clause information and mark the existing map
8532     // entries as such. If there is no map information for an entry in the
8533     // use_device_ptr list, we create one with map type 'alloc' and zero size
8534     // section. It is the user fault if that was not mapped before. If there is
8535     // no map information and the pointer is a struct member, then we defer the
8536     // emission of that entry until the whole struct has been processed.
8537     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8538                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8539         DeferredInfo;
8540     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8541 
8542     for (const auto *Cl : Clauses) {
8543       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8544       if (!C)
8545         continue;
8546       for (const auto L : C->component_lists()) {
8547         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8548             std::get<1>(L);
8549         assert(!Components.empty() &&
8550                "Not expecting empty list of components!");
8551         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8552         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8553         const Expr *IE = Components.back().getAssociatedExpression();
8554         // If the first component is a member expression, we have to look into
8555         // 'this', which maps to null in the map of map information. Otherwise
8556         // look directly for the information.
8557         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8558 
8559         // We potentially have map information for this declaration already.
8560         // Look for the first set of components that refer to it.
8561         if (It != Info.end()) {
8562           bool Found = false;
8563           for (auto &Data : It->second) {
8564             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8565               return MI.Components.back().getAssociatedDeclaration() == VD;
8566             });
8567             // If we found a map entry, signal that the pointer has to be
8568             // returned and move on to the next declaration. Exclude cases where
8569             // the base pointer is mapped as array subscript, array section or
8570             // array shaping. The base address is passed as a pointer to base in
8571             // this case and cannot be used as a base for use_device_ptr list
8572             // item.
8573             if (CI != Data.end()) {
8574               auto PrevCI = std::next(CI->Components.rbegin());
8575               const auto *VarD = dyn_cast<VarDecl>(VD);
8576               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8577                   isa<MemberExpr>(IE) ||
8578                   !VD->getType().getNonReferenceType()->isPointerType() ||
8579                   PrevCI == CI->Components.rend() ||
8580                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8581                   VarD->hasLocalStorage()) {
8582                 CI->ReturnDevicePointer = true;
8583                 Found = true;
8584                 break;
8585               }
8586             }
8587           }
8588           if (Found)
8589             continue;
8590         }
8591 
8592         // We didn't find any match in our map information - generate a zero
8593         // size array section - if the pointer is a struct member we defer this
8594         // action until the whole struct has been processed.
8595         if (isa<MemberExpr>(IE)) {
8596           // Insert the pointer into Info to be processed by
8597           // generateInfoForComponentList. Because it is a member pointer
8598           // without a pointee, no entry will be generated for it, therefore
8599           // we need to generate one after the whole struct has been processed.
8600           // Nonetheless, generateInfoForComponentList must be called to take
8601           // the pointer into account for the calculation of the range of the
8602           // partial struct.
8603           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8604                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8605                   nullptr);
8606           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8607         } else {
8608           llvm::Value *Ptr =
8609               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8610           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8611           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8612           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8613           UseDevicePtrCombinedInfo.Sizes.push_back(
8614               llvm::Constant::getNullValue(CGF.Int64Ty));
8615           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8616           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8617         }
8618       }
8619     }
8620 
8621     // Look at the use_device_addr clause information and mark the existing map
8622     // entries as such. If there is no map information for an entry in the
8623     // use_device_addr list, we create one with map type 'alloc' and zero size
8624     // section. It is the user fault if that was not mapped before. If there is
8625     // no map information and the pointer is a struct member, then we defer the
8626     // emission of that entry until the whole struct has been processed.
8627     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8628     for (const auto *Cl : Clauses) {
8629       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8630       if (!C)
8631         continue;
8632       for (const auto L : C->component_lists()) {
8633         assert(!std::get<1>(L).empty() &&
8634                "Not expecting empty list of components!");
8635         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8636         if (!Processed.insert(VD).second)
8637           continue;
8638         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8639         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8640         // If the first component is a member expression, we have to look into
8641         // 'this', which maps to null in the map of map information. Otherwise
8642         // look directly for the information.
8643         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8644 
8645         // We potentially have map information for this declaration already.
8646         // Look for the first set of components that refer to it.
8647         if (It != Info.end()) {
8648           bool Found = false;
8649           for (auto &Data : It->second) {
8650             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8651               return MI.Components.back().getAssociatedDeclaration() == VD;
8652             });
8653             // If we found a map entry, signal that the pointer has to be
8654             // returned and move on to the next declaration.
8655             if (CI != Data.end()) {
8656               CI->ReturnDevicePointer = true;
8657               Found = true;
8658               break;
8659             }
8660           }
8661           if (Found)
8662             continue;
8663         }
8664 
8665         // We didn't find any match in our map information - generate a zero
8666         // size array section - if the pointer is a struct member we defer this
8667         // action until the whole struct has been processed.
8668         if (isa<MemberExpr>(IE)) {
8669           // Insert the pointer into Info to be processed by
8670           // generateInfoForComponentList. Because it is a member pointer
8671           // without a pointee, no entry will be generated for it, therefore
8672           // we need to generate one after the whole struct has been processed.
8673           // Nonetheless, generateInfoForComponentList must be called to take
8674           // the pointer into account for the calculation of the range of the
8675           // partial struct.
8676           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8677                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8678                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8679           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8680         } else {
8681           llvm::Value *Ptr;
8682           if (IE->isGLValue())
8683             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8684           else
8685             Ptr = CGF.EmitScalarExpr(IE);
8686           CombinedInfo.Exprs.push_back(VD);
8687           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8688           CombinedInfo.Pointers.push_back(Ptr);
8689           CombinedInfo.Sizes.push_back(
8690               llvm::Constant::getNullValue(CGF.Int64Ty));
8691           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8692           CombinedInfo.Mappers.push_back(nullptr);
8693         }
8694       }
8695     }
8696 
8697     for (const auto &Data : Info) {
8698       StructRangeInfoTy PartialStruct;
8699       // Temporary generated information.
8700       MapCombinedInfoTy CurInfo;
8701       const Decl *D = Data.first;
8702       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8703       for (const auto &M : Data.second) {
8704         for (const MapInfo &L : M) {
8705           assert(!L.Components.empty() &&
8706                  "Not expecting declaration with no component lists.");
8707 
8708           // Remember the current base pointer index.
8709           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8710           CurInfo.NonContigInfo.IsNonContiguous =
8711               L.Components.back().isNonContiguous();
8712           generateInfoForComponentList(
8713               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8714               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8715               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8716 
8717           // If this entry relates with a device pointer, set the relevant
8718           // declaration and add the 'return pointer' flag.
8719           if (L.ReturnDevicePointer) {
8720             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8721                    "Unexpected number of mapped base pointers.");
8722 
8723             const ValueDecl *RelevantVD =
8724                 L.Components.back().getAssociatedDeclaration();
8725             assert(RelevantVD &&
8726                    "No relevant declaration related with device pointer??");
8727 
8728             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8729                 RelevantVD);
8730             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8731           }
8732         }
8733       }
8734 
8735       // Append any pending zero-length pointers which are struct members and
8736       // used with use_device_ptr or use_device_addr.
8737       auto CI = DeferredInfo.find(Data.first);
8738       if (CI != DeferredInfo.end()) {
8739         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8740           llvm::Value *BasePtr;
8741           llvm::Value *Ptr;
8742           if (L.ForDeviceAddr) {
8743             if (L.IE->isGLValue())
8744               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8745             else
8746               Ptr = this->CGF.EmitScalarExpr(L.IE);
8747             BasePtr = Ptr;
8748             // Entry is RETURN_PARAM. Also, set the placeholder value
8749             // MEMBER_OF=FFFF so that the entry is later updated with the
8750             // correct value of MEMBER_OF.
8751             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8752           } else {
8753             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8754             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8755                                              L.IE->getExprLoc());
8756             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8757             // placeholder value MEMBER_OF=FFFF so that the entry is later
8758             // updated with the correct value of MEMBER_OF.
8759             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8760                                     OMP_MAP_MEMBER_OF);
8761           }
8762           CurInfo.Exprs.push_back(L.VD);
8763           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8764           CurInfo.Pointers.push_back(Ptr);
8765           CurInfo.Sizes.push_back(
8766               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8767           CurInfo.Mappers.push_back(nullptr);
8768         }
8769       }
8770       // If there is an entry in PartialStruct it means we have a struct with
8771       // individual members mapped. Emit an extra combined entry.
8772       if (PartialStruct.Base.isValid()) {
8773         CurInfo.NonContigInfo.Dims.push_back(0);
8774         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8775       }
8776 
8777       // We need to append the results of this capture to what we already
8778       // have.
8779       CombinedInfo.append(CurInfo);
8780     }
8781     // Append data for use_device_ptr clauses.
8782     CombinedInfo.append(UseDevicePtrCombinedInfo);
8783   }
8784 
8785 public:
8786   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8787       : CurDir(&Dir), CGF(CGF) {
8788     // Extract firstprivate clause information.
8789     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8790       for (const auto *D : C->varlists())
8791         FirstPrivateDecls.try_emplace(
8792             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8793     // Extract implicit firstprivates from uses_allocators clauses.
8794     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8795       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8796         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8797         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8798           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8799                                         /*Implicit=*/true);
8800         else if (const auto *VD = dyn_cast<VarDecl>(
8801                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8802                          ->getDecl()))
8803           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8804       }
8805     }
8806     // Extract device pointer clause information.
8807     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8808       for (auto L : C->component_lists())
8809         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8810     // Extract map information.
8811     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8812       if (C->getMapType() != OMPC_MAP_to)
8813         continue;
8814       for (auto L : C->component_lists()) {
8815         const ValueDecl *VD = std::get<0>(L);
8816         const auto *RD = VD ? VD->getType()
8817                                   .getCanonicalType()
8818                                   .getNonReferenceType()
8819                                   ->getAsCXXRecordDecl()
8820                             : nullptr;
8821         if (RD && RD->isLambda())
8822           LambdasMap.try_emplace(std::get<0>(L), C);
8823       }
8824     }
8825   }
8826 
8827   /// Constructor for the declare mapper directive.
8828   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8829       : CurDir(&Dir), CGF(CGF) {}
8830 
8831   /// Generate code for the combined entry if we have a partially mapped struct
8832   /// and take care of the mapping flags of the arguments corresponding to
8833   /// individual struct members.
8834   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8835                          MapFlagsArrayTy &CurTypes,
8836                          const StructRangeInfoTy &PartialStruct,
8837                          const ValueDecl *VD = nullptr,
8838                          bool NotTargetParams = true) const {
8839     if (CurTypes.size() == 1 &&
8840         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8841         !PartialStruct.IsArraySection)
8842       return;
8843     Address LBAddr = PartialStruct.LowestElem.second;
8844     Address HBAddr = PartialStruct.HighestElem.second;
8845     if (PartialStruct.HasCompleteRecord) {
8846       LBAddr = PartialStruct.LB;
8847       HBAddr = PartialStruct.LB;
8848     }
8849     CombinedInfo.Exprs.push_back(VD);
8850     // Base is the base of the struct
8851     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8852     // Pointer is the address of the lowest element
8853     llvm::Value *LB = LBAddr.getPointer();
8854     CombinedInfo.Pointers.push_back(LB);
8855     // There should not be a mapper for a combined entry.
8856     CombinedInfo.Mappers.push_back(nullptr);
8857     // Size is (addr of {highest+1} element) - (addr of lowest element)
8858     llvm::Value *HB = HBAddr.getPointer();
8859     llvm::Value *HAddr =
8860         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8861     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8862     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8863     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8864     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8865                                                   /*isSigned=*/false);
8866     CombinedInfo.Sizes.push_back(Size);
8867     // Map type is always TARGET_PARAM, if generate info for captures.
8868     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8869                                                  : OMP_MAP_TARGET_PARAM);
8870     // If any element has the present modifier, then make sure the runtime
8871     // doesn't attempt to allocate the struct.
8872     if (CurTypes.end() !=
8873         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8874           return Type & OMP_MAP_PRESENT;
8875         }))
8876       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8877     // Remove TARGET_PARAM flag from the first element
8878     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8879     // If any element has the ompx_hold modifier, then make sure the runtime
8880     // uses the hold reference count for the struct as a whole so that it won't
8881     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8882     // elements as well so the runtime knows which reference count to check
8883     // when determining whether it's time for device-to-host transfers of
8884     // individual elements.
8885     if (CurTypes.end() !=
8886         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8887           return Type & OMP_MAP_OMPX_HOLD;
8888         })) {
8889       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8890       for (auto &M : CurTypes)
8891         M |= OMP_MAP_OMPX_HOLD;
8892     }
8893 
8894     // All other current entries will be MEMBER_OF the combined entry
8895     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8896     // 0xFFFF in the MEMBER_OF field).
8897     OpenMPOffloadMappingFlags MemberOfFlag =
8898         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8899     for (auto &M : CurTypes)
8900       setCorrectMemberOfFlag(M, MemberOfFlag);
8901   }
8902 
8903   /// Generate all the base pointers, section pointers, sizes, map types, and
8904   /// mappers for the extracted mappable expressions (all included in \a
8905   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8906   /// pair of the relevant declaration and index where it occurs is appended to
8907   /// the device pointers info array.
8908   void generateAllInfo(
8909       MapCombinedInfoTy &CombinedInfo,
8910       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8911           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8912     assert(CurDir.is<const OMPExecutableDirective *>() &&
8913            "Expect a executable directive");
8914     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8915     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8916   }
8917 
8918   /// Generate all the base pointers, section pointers, sizes, map types, and
8919   /// mappers for the extracted map clauses of user-defined mapper (all included
8920   /// in \a CombinedInfo).
8921   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8922     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8923            "Expect a declare mapper directive");
8924     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8925     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8926   }
8927 
8928   /// Emit capture info for lambdas for variables captured by reference.
8929   void generateInfoForLambdaCaptures(
8930       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8931       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8932     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8933     const auto *RD = VDType->getAsCXXRecordDecl();
8934     if (!RD || !RD->isLambda())
8935       return;
8936     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8937                    CGF.getContext().getDeclAlign(VD));
8938     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8939     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8940     FieldDecl *ThisCapture = nullptr;
8941     RD->getCaptureFields(Captures, ThisCapture);
8942     if (ThisCapture) {
8943       LValue ThisLVal =
8944           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8945       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8946       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8947                                  VDLVal.getPointer(CGF));
8948       CombinedInfo.Exprs.push_back(VD);
8949       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8950       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8951       CombinedInfo.Sizes.push_back(
8952           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8953                                     CGF.Int64Ty, /*isSigned=*/true));
8954       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8955                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8956       CombinedInfo.Mappers.push_back(nullptr);
8957     }
8958     for (const LambdaCapture &LC : RD->captures()) {
8959       if (!LC.capturesVariable())
8960         continue;
8961       const VarDecl *VD = LC.getCapturedVar();
8962       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8963         continue;
8964       auto It = Captures.find(VD);
8965       assert(It != Captures.end() && "Found lambda capture without field.");
8966       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8967       if (LC.getCaptureKind() == LCK_ByRef) {
8968         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8969         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8970                                    VDLVal.getPointer(CGF));
8971         CombinedInfo.Exprs.push_back(VD);
8972         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8973         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8974         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8975             CGF.getTypeSize(
8976                 VD->getType().getCanonicalType().getNonReferenceType()),
8977             CGF.Int64Ty, /*isSigned=*/true));
8978       } else {
8979         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8980         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8981                                    VDLVal.getPointer(CGF));
8982         CombinedInfo.Exprs.push_back(VD);
8983         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8984         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8985         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8986       }
8987       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8988                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8989       CombinedInfo.Mappers.push_back(nullptr);
8990     }
8991   }
8992 
8993   /// Set correct indices for lambdas captures.
8994   void adjustMemberOfForLambdaCaptures(
8995       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8996       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8997       MapFlagsArrayTy &Types) const {
8998     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8999       // Set correct member_of idx for all implicit lambda captures.
9000       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9001                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9002         continue;
9003       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9004       assert(BasePtr && "Unable to find base lambda address.");
9005       int TgtIdx = -1;
9006       for (unsigned J = I; J > 0; --J) {
9007         unsigned Idx = J - 1;
9008         if (Pointers[Idx] != BasePtr)
9009           continue;
9010         TgtIdx = Idx;
9011         break;
9012       }
9013       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9014       // All other current entries will be MEMBER_OF the combined entry
9015       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9016       // 0xFFFF in the MEMBER_OF field).
9017       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9018       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9019     }
9020   }
9021 
9022   /// Generate the base pointers, section pointers, sizes, map types, and
9023   /// mappers associated to a given capture (all included in \a CombinedInfo).
9024   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9025                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9026                               StructRangeInfoTy &PartialStruct) const {
9027     assert(!Cap->capturesVariableArrayType() &&
9028            "Not expecting to generate map info for a variable array type!");
9029 
9030     // We need to know when we generating information for the first component
9031     const ValueDecl *VD = Cap->capturesThis()
9032                               ? nullptr
9033                               : Cap->getCapturedVar()->getCanonicalDecl();
9034 
9035     // for map(to: lambda): skip here, processing it in
9036     // generateDefaultMapInfo
9037     if (LambdasMap.count(VD))
9038       return;
9039 
9040     // If this declaration appears in a is_device_ptr clause we just have to
9041     // pass the pointer by value. If it is a reference to a declaration, we just
9042     // pass its value.
9043     if (DevPointersMap.count(VD)) {
9044       CombinedInfo.Exprs.push_back(VD);
9045       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9046       CombinedInfo.Pointers.push_back(Arg);
9047       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9048           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9049           /*isSigned=*/true));
9050       CombinedInfo.Types.push_back(
9051           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9052           OMP_MAP_TARGET_PARAM);
9053       CombinedInfo.Mappers.push_back(nullptr);
9054       return;
9055     }
9056 
9057     using MapData =
9058         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9059                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9060                    const ValueDecl *, const Expr *>;
9061     SmallVector<MapData, 4> DeclComponentLists;
9062     assert(CurDir.is<const OMPExecutableDirective *>() &&
9063            "Expect a executable directive");
9064     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9065     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9066       const auto *EI = C->getVarRefs().begin();
9067       for (const auto L : C->decl_component_lists(VD)) {
9068         const ValueDecl *VDecl, *Mapper;
9069         // The Expression is not correct if the mapping is implicit
9070         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9071         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9072         std::tie(VDecl, Components, Mapper) = L;
9073         assert(VDecl == VD && "We got information for the wrong declaration??");
9074         assert(!Components.empty() &&
9075                "Not expecting declaration with no component lists.");
9076         DeclComponentLists.emplace_back(Components, C->getMapType(),
9077                                         C->getMapTypeModifiers(),
9078                                         C->isImplicit(), Mapper, E);
9079         ++EI;
9080       }
9081     }
9082     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9083                                              const MapData &RHS) {
9084       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9085       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9086       bool HasPresent =
9087           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9088       bool HasAllocs = MapType == OMPC_MAP_alloc;
9089       MapModifiers = std::get<2>(RHS);
9090       MapType = std::get<1>(LHS);
9091       bool HasPresentR =
9092           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9093       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9094       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9095     });
9096 
9097     // Find overlapping elements (including the offset from the base element).
9098     llvm::SmallDenseMap<
9099         const MapData *,
9100         llvm::SmallVector<
9101             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9102         4>
9103         OverlappedData;
9104     size_t Count = 0;
9105     for (const MapData &L : DeclComponentLists) {
9106       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9107       OpenMPMapClauseKind MapType;
9108       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9109       bool IsImplicit;
9110       const ValueDecl *Mapper;
9111       const Expr *VarRef;
9112       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9113           L;
9114       ++Count;
9115       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9116         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9117         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9118                  VarRef) = L1;
9119         auto CI = Components.rbegin();
9120         auto CE = Components.rend();
9121         auto SI = Components1.rbegin();
9122         auto SE = Components1.rend();
9123         for (; CI != CE && SI != SE; ++CI, ++SI) {
9124           if (CI->getAssociatedExpression()->getStmtClass() !=
9125               SI->getAssociatedExpression()->getStmtClass())
9126             break;
9127           // Are we dealing with different variables/fields?
9128           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9129             break;
9130         }
9131         // Found overlapping if, at least for one component, reached the head
9132         // of the components list.
9133         if (CI == CE || SI == SE) {
9134           // Ignore it if it is the same component.
9135           if (CI == CE && SI == SE)
9136             continue;
9137           const auto It = (SI == SE) ? CI : SI;
9138           // If one component is a pointer and another one is a kind of
9139           // dereference of this pointer (array subscript, section, dereference,
9140           // etc.), it is not an overlapping.
9141           // Same, if one component is a base and another component is a
9142           // dereferenced pointer memberexpr with the same base.
9143           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9144               (std::prev(It)->getAssociatedDeclaration() &&
9145                std::prev(It)
9146                    ->getAssociatedDeclaration()
9147                    ->getType()
9148                    ->isPointerType()) ||
9149               (It->getAssociatedDeclaration() &&
9150                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9151                std::next(It) != CE && std::next(It) != SE))
9152             continue;
9153           const MapData &BaseData = CI == CE ? L : L1;
9154           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9155               SI == SE ? Components : Components1;
9156           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9157           OverlappedElements.getSecond().push_back(SubData);
9158         }
9159       }
9160     }
9161     // Sort the overlapped elements for each item.
9162     llvm::SmallVector<const FieldDecl *, 4> Layout;
9163     if (!OverlappedData.empty()) {
9164       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9165       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9166       while (BaseType != OrigType) {
9167         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9168         OrigType = BaseType->getPointeeOrArrayElementType();
9169       }
9170 
9171       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9172         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9173       else {
9174         const auto *RD = BaseType->getAsRecordDecl();
9175         Layout.append(RD->field_begin(), RD->field_end());
9176       }
9177     }
9178     for (auto &Pair : OverlappedData) {
9179       llvm::stable_sort(
9180           Pair.getSecond(),
9181           [&Layout](
9182               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9183               OMPClauseMappableExprCommon::MappableExprComponentListRef
9184                   Second) {
9185             auto CI = First.rbegin();
9186             auto CE = First.rend();
9187             auto SI = Second.rbegin();
9188             auto SE = Second.rend();
9189             for (; CI != CE && SI != SE; ++CI, ++SI) {
9190               if (CI->getAssociatedExpression()->getStmtClass() !=
9191                   SI->getAssociatedExpression()->getStmtClass())
9192                 break;
9193               // Are we dealing with different variables/fields?
9194               if (CI->getAssociatedDeclaration() !=
9195                   SI->getAssociatedDeclaration())
9196                 break;
9197             }
9198 
9199             // Lists contain the same elements.
9200             if (CI == CE && SI == SE)
9201               return false;
9202 
9203             // List with less elements is less than list with more elements.
9204             if (CI == CE || SI == SE)
9205               return CI == CE;
9206 
9207             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9208             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9209             if (FD1->getParent() == FD2->getParent())
9210               return FD1->getFieldIndex() < FD2->getFieldIndex();
9211             const auto *It =
9212                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9213                   return FD == FD1 || FD == FD2;
9214                 });
9215             return *It == FD1;
9216           });
9217     }
9218 
9219     // Associated with a capture, because the mapping flags depend on it.
9220     // Go through all of the elements with the overlapped elements.
9221     bool IsFirstComponentList = true;
9222     for (const auto &Pair : OverlappedData) {
9223       const MapData &L = *Pair.getFirst();
9224       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9225       OpenMPMapClauseKind MapType;
9226       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9227       bool IsImplicit;
9228       const ValueDecl *Mapper;
9229       const Expr *VarRef;
9230       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9231           L;
9232       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9233           OverlappedComponents = Pair.getSecond();
9234       generateInfoForComponentList(
9235           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9236           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9237           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9238       IsFirstComponentList = false;
9239     }
9240     // Go through other elements without overlapped elements.
9241     for (const MapData &L : DeclComponentLists) {
9242       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9243       OpenMPMapClauseKind MapType;
9244       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9245       bool IsImplicit;
9246       const ValueDecl *Mapper;
9247       const Expr *VarRef;
9248       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9249           L;
9250       auto It = OverlappedData.find(&L);
9251       if (It == OverlappedData.end())
9252         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9253                                      Components, CombinedInfo, PartialStruct,
9254                                      IsFirstComponentList, IsImplicit, Mapper,
9255                                      /*ForDeviceAddr=*/false, VD, VarRef);
9256       IsFirstComponentList = false;
9257     }
9258   }
9259 
9260   /// Generate the default map information for a given capture \a CI,
9261   /// record field declaration \a RI and captured value \a CV.
9262   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9263                               const FieldDecl &RI, llvm::Value *CV,
9264                               MapCombinedInfoTy &CombinedInfo) const {
9265     bool IsImplicit = true;
9266     // Do the default mapping.
9267     if (CI.capturesThis()) {
9268       CombinedInfo.Exprs.push_back(nullptr);
9269       CombinedInfo.BasePointers.push_back(CV);
9270       CombinedInfo.Pointers.push_back(CV);
9271       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9272       CombinedInfo.Sizes.push_back(
9273           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9274                                     CGF.Int64Ty, /*isSigned=*/true));
9275       // Default map type.
9276       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9277     } else if (CI.capturesVariableByCopy()) {
9278       const VarDecl *VD = CI.getCapturedVar();
9279       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9280       CombinedInfo.BasePointers.push_back(CV);
9281       CombinedInfo.Pointers.push_back(CV);
9282       if (!RI.getType()->isAnyPointerType()) {
9283         // We have to signal to the runtime captures passed by value that are
9284         // not pointers.
9285         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9286         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9287             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9288       } else {
9289         // Pointers are implicitly mapped with a zero size and no flags
9290         // (other than first map that is added for all implicit maps).
9291         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9292         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9293       }
9294       auto I = FirstPrivateDecls.find(VD);
9295       if (I != FirstPrivateDecls.end())
9296         IsImplicit = I->getSecond();
9297     } else {
9298       assert(CI.capturesVariable() && "Expected captured reference.");
9299       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9300       QualType ElementType = PtrTy->getPointeeType();
9301       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9302           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9303       // The default map type for a scalar/complex type is 'to' because by
9304       // default the value doesn't have to be retrieved. For an aggregate
9305       // type, the default is 'tofrom'.
9306       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9307       const VarDecl *VD = CI.getCapturedVar();
9308       auto I = FirstPrivateDecls.find(VD);
9309       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9310       CombinedInfo.BasePointers.push_back(CV);
9311       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9312         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9313             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9314             AlignmentSource::Decl));
9315         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9316       } else {
9317         CombinedInfo.Pointers.push_back(CV);
9318       }
9319       if (I != FirstPrivateDecls.end())
9320         IsImplicit = I->getSecond();
9321     }
9322     // Every default map produces a single argument which is a target parameter.
9323     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9324 
9325     // Add flag stating this is an implicit map.
9326     if (IsImplicit)
9327       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9328 
9329     // No user-defined mapper for default mapping.
9330     CombinedInfo.Mappers.push_back(nullptr);
9331   }
9332 };
9333 } // anonymous namespace
9334 
9335 static void emitNonContiguousDescriptor(
9336     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9337     CGOpenMPRuntime::TargetDataInfo &Info) {
9338   CodeGenModule &CGM = CGF.CGM;
9339   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9340       &NonContigInfo = CombinedInfo.NonContigInfo;
9341 
9342   // Build an array of struct descriptor_dim and then assign it to
9343   // offload_args.
9344   //
9345   // struct descriptor_dim {
9346   //  uint64_t offset;
9347   //  uint64_t count;
9348   //  uint64_t stride
9349   // };
9350   ASTContext &C = CGF.getContext();
9351   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9352   RecordDecl *RD;
9353   RD = C.buildImplicitRecord("descriptor_dim");
9354   RD->startDefinition();
9355   addFieldToRecordDecl(C, RD, Int64Ty);
9356   addFieldToRecordDecl(C, RD, Int64Ty);
9357   addFieldToRecordDecl(C, RD, Int64Ty);
9358   RD->completeDefinition();
9359   QualType DimTy = C.getRecordType(RD);
9360 
9361   enum { OffsetFD = 0, CountFD, StrideFD };
9362   // We need two index variable here since the size of "Dims" is the same as the
9363   // size of Components, however, the size of offset, count, and stride is equal
9364   // to the size of base declaration that is non-contiguous.
9365   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9366     // Skip emitting ir if dimension size is 1 since it cannot be
9367     // non-contiguous.
9368     if (NonContigInfo.Dims[I] == 1)
9369       continue;
9370     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9371     QualType ArrayTy =
9372         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9373     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9374     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9375       unsigned RevIdx = EE - II - 1;
9376       LValue DimsLVal = CGF.MakeAddrLValue(
9377           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9378       // Offset
9379       LValue OffsetLVal = CGF.EmitLValueForField(
9380           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9381       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9382       // Count
9383       LValue CountLVal = CGF.EmitLValueForField(
9384           DimsLVal, *std::next(RD->field_begin(), CountFD));
9385       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9386       // Stride
9387       LValue StrideLVal = CGF.EmitLValueForField(
9388           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9389       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9390     }
9391     // args[I] = &dims
9392     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9393         DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9394     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9395         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9396         Info.PointersArray, 0, I);
9397     Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
9398     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9399     ++L;
9400   }
9401 }
9402 
9403 // Try to extract the base declaration from a `this->x` expression if possible.
9404 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9405   if (!E)
9406     return nullptr;
9407 
9408   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9409     if (const MemberExpr *ME =
9410             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9411       return ME->getMemberDecl();
9412   return nullptr;
9413 }
9414 
9415 /// Emit a string constant containing the names of the values mapped to the
9416 /// offloading runtime library.
9417 llvm::Constant *
9418 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9419                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9420 
9421   uint32_t SrcLocStrSize;
9422   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9423     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9424 
9425   SourceLocation Loc;
9426   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9427     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9428       Loc = VD->getLocation();
9429     else
9430       Loc = MapExprs.getMapExpr()->getExprLoc();
9431   } else {
9432     Loc = MapExprs.getMapDecl()->getLocation();
9433   }
9434 
9435   std::string ExprName;
9436   if (MapExprs.getMapExpr()) {
9437     PrintingPolicy P(CGF.getContext().getLangOpts());
9438     llvm::raw_string_ostream OS(ExprName);
9439     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9440     OS.flush();
9441   } else {
9442     ExprName = MapExprs.getMapDecl()->getNameAsString();
9443   }
9444 
9445   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9446   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9447                                          PLoc.getLine(), PLoc.getColumn(),
9448                                          SrcLocStrSize);
9449 }
9450 
9451 /// Emit the arrays used to pass the captures and map information to the
9452 /// offloading runtime library. If there is no map or capture information,
9453 /// return nullptr by reference.
9454 static void emitOffloadingArrays(
9455     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9456     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9457     bool IsNonContiguous = false) {
9458   CodeGenModule &CGM = CGF.CGM;
9459   ASTContext &Ctx = CGF.getContext();
9460 
9461   // Reset the array information.
9462   Info.clearArrayInfo();
9463   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9464 
9465   if (Info.NumberOfPtrs) {
9466     // Detect if we have any capture size requiring runtime evaluation of the
9467     // size so that a constant array could be eventually used.
9468 
9469     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9470     QualType PointerArrayType = Ctx.getConstantArrayType(
9471         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9472         /*IndexTypeQuals=*/0);
9473 
9474     Info.BasePointersArray =
9475         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9476     Info.PointersArray =
9477         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9478     Address MappersArray =
9479         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9480     Info.MappersArray = MappersArray.getPointer();
9481 
9482     // If we don't have any VLA types or other types that require runtime
9483     // evaluation, we can use a constant array for the map sizes, otherwise we
9484     // need to fill up the arrays as we do for the pointers.
9485     QualType Int64Ty =
9486         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9487     SmallVector<llvm::Constant *> ConstSizes(
9488         CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9489     llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9490     for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9491       if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9492         if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9493           if (IsNonContiguous && (CombinedInfo.Types[I] &
9494                                   MappableExprsHandler::OMP_MAP_NON_CONTIG))
9495             ConstSizes[I] = llvm::ConstantInt::get(
9496                 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9497           else
9498             ConstSizes[I] = CI;
9499           continue;
9500         }
9501       }
9502       RuntimeSizes.set(I);
9503     }
9504 
9505     if (RuntimeSizes.all()) {
9506       QualType SizeArrayType = Ctx.getConstantArrayType(
9507           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9508           /*IndexTypeQuals=*/0);
9509       Info.SizesArray =
9510           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9511     } else {
9512       auto *SizesArrayInit = llvm::ConstantArray::get(
9513           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9514       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9515       auto *SizesArrayGbl = new llvm::GlobalVariable(
9516           CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9517           llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9518       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9519       if (RuntimeSizes.any()) {
9520         QualType SizeArrayType = Ctx.getConstantArrayType(
9521             Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9522             /*IndexTypeQuals=*/0);
9523         Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9524         llvm::Value *GblConstPtr =
9525             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9526                 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9527         CGF.Builder.CreateMemCpy(
9528             Buffer,
9529             Address(GblConstPtr, CGM.Int64Ty,
9530                     CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9531                         /*DestWidth=*/64, /*Signed=*/false))),
9532             CGF.getTypeSize(SizeArrayType));
9533         Info.SizesArray = Buffer.getPointer();
9534       } else {
9535         Info.SizesArray = SizesArrayGbl;
9536       }
9537     }
9538 
9539     // The map types are always constant so we don't need to generate code to
9540     // fill arrays. Instead, we create an array constant.
9541     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9542     llvm::copy(CombinedInfo.Types, Mapping.begin());
9543     std::string MaptypesName =
9544         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9545     auto *MapTypesArrayGbl =
9546         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9547     Info.MapTypesArray = MapTypesArrayGbl;
9548 
9549     // The information types are only built if there is debug information
9550     // requested.
9551     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9552       Info.MapNamesArray = llvm::Constant::getNullValue(
9553           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9554     } else {
9555       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9556         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9557       };
9558       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9559       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9560       std::string MapnamesName =
9561           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9562       auto *MapNamesArrayGbl =
9563           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9564       Info.MapNamesArray = MapNamesArrayGbl;
9565     }
9566 
9567     // If there's a present map type modifier, it must not be applied to the end
9568     // of a region, so generate a separate map type array in that case.
9569     if (Info.separateBeginEndCalls()) {
9570       bool EndMapTypesDiffer = false;
9571       for (uint64_t &Type : Mapping) {
9572         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9573           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9574           EndMapTypesDiffer = true;
9575         }
9576       }
9577       if (EndMapTypesDiffer) {
9578         MapTypesArrayGbl =
9579             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9580         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9581       }
9582     }
9583 
9584     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9585       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9586       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9587           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9588           Info.BasePointersArray, 0, I);
9589       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9590           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9591       Address BPAddr(BP, BPVal->getType(),
9592                      Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9593       CGF.Builder.CreateStore(BPVal, BPAddr);
9594 
9595       if (Info.requiresDevicePointerInfo())
9596         if (const ValueDecl *DevVD =
9597                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9598           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9599 
9600       llvm::Value *PVal = CombinedInfo.Pointers[I];
9601       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9602           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9603           Info.PointersArray, 0, I);
9604       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9605           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9606       Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9607       CGF.Builder.CreateStore(PVal, PAddr);
9608 
9609       if (RuntimeSizes.test(I)) {
9610         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9611             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9612             Info.SizesArray,
9613             /*Idx0=*/0,
9614             /*Idx1=*/I);
9615         Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
9616         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9617                                                           CGM.Int64Ty,
9618                                                           /*isSigned=*/true),
9619                                 SAddr);
9620       }
9621 
9622       // Fill up the mapper array.
9623       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9624       if (CombinedInfo.Mappers[I]) {
9625         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9626             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9627         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9628         Info.HasMapper = true;
9629       }
9630       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9631       CGF.Builder.CreateStore(MFunc, MAddr);
9632     }
9633   }
9634 
9635   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9636       Info.NumberOfPtrs == 0)
9637     return;
9638 
9639   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9640 }
9641 
9642 namespace {
9643 /// Additional arguments for emitOffloadingArraysArgument function.
9644 struct ArgumentsOptions {
9645   bool ForEndCall = false;
9646   ArgumentsOptions() = default;
9647   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9648 };
9649 } // namespace
9650 
9651 /// Emit the arguments to be passed to the runtime library based on the
9652 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9653 /// ForEndCall, emit map types to be passed for the end of the region instead of
9654 /// the beginning.
9655 static void emitOffloadingArraysArgument(
9656     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9657     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9658     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9659     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9660     const ArgumentsOptions &Options = ArgumentsOptions()) {
9661   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9662          "expected region end call to runtime only when end call is separate");
9663   CodeGenModule &CGM = CGF.CGM;
9664   if (Info.NumberOfPtrs) {
9665     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9666         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9667         Info.BasePointersArray,
9668         /*Idx0=*/0, /*Idx1=*/0);
9669     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9670         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9671         Info.PointersArray,
9672         /*Idx0=*/0,
9673         /*Idx1=*/0);
9674     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9675         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9676         /*Idx0=*/0, /*Idx1=*/0);
9677     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9678         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9679         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9680                                                     : Info.MapTypesArray,
9681         /*Idx0=*/0,
9682         /*Idx1=*/0);
9683 
9684     // Only emit the mapper information arrays if debug information is
9685     // requested.
9686     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9687       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9688     else
9689       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9690           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9691           Info.MapNamesArray,
9692           /*Idx0=*/0,
9693           /*Idx1=*/0);
9694     // If there is no user-defined mapper, set the mapper array to nullptr to
9695     // avoid an unnecessary data privatization
9696     if (!Info.HasMapper)
9697       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9698     else
9699       MappersArrayArg =
9700           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9701   } else {
9702     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9703     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9704     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9705     MapTypesArrayArg =
9706         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9707     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9708     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9709   }
9710 }
9711 
9712 /// Check for inner distribute directive.
9713 static const OMPExecutableDirective *
9714 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9715   const auto *CS = D.getInnermostCapturedStmt();
9716   const auto *Body =
9717       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9718   const Stmt *ChildStmt =
9719       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9720 
9721   if (const auto *NestedDir =
9722           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9723     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9724     switch (D.getDirectiveKind()) {
9725     case OMPD_target:
9726       if (isOpenMPDistributeDirective(DKind))
9727         return NestedDir;
9728       if (DKind == OMPD_teams) {
9729         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9730             /*IgnoreCaptured=*/true);
9731         if (!Body)
9732           return nullptr;
9733         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9734         if (const auto *NND =
9735                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9736           DKind = NND->getDirectiveKind();
9737           if (isOpenMPDistributeDirective(DKind))
9738             return NND;
9739         }
9740       }
9741       return nullptr;
9742     case OMPD_target_teams:
9743       if (isOpenMPDistributeDirective(DKind))
9744         return NestedDir;
9745       return nullptr;
9746     case OMPD_target_parallel:
9747     case OMPD_target_simd:
9748     case OMPD_target_parallel_for:
9749     case OMPD_target_parallel_for_simd:
9750       return nullptr;
9751     case OMPD_target_teams_distribute:
9752     case OMPD_target_teams_distribute_simd:
9753     case OMPD_target_teams_distribute_parallel_for:
9754     case OMPD_target_teams_distribute_parallel_for_simd:
9755     case OMPD_parallel:
9756     case OMPD_for:
9757     case OMPD_parallel_for:
9758     case OMPD_parallel_master:
9759     case OMPD_parallel_sections:
9760     case OMPD_for_simd:
9761     case OMPD_parallel_for_simd:
9762     case OMPD_cancel:
9763     case OMPD_cancellation_point:
9764     case OMPD_ordered:
9765     case OMPD_threadprivate:
9766     case OMPD_allocate:
9767     case OMPD_task:
9768     case OMPD_simd:
9769     case OMPD_tile:
9770     case OMPD_unroll:
9771     case OMPD_sections:
9772     case OMPD_section:
9773     case OMPD_single:
9774     case OMPD_master:
9775     case OMPD_critical:
9776     case OMPD_taskyield:
9777     case OMPD_barrier:
9778     case OMPD_taskwait:
9779     case OMPD_taskgroup:
9780     case OMPD_atomic:
9781     case OMPD_flush:
9782     case OMPD_depobj:
9783     case OMPD_scan:
9784     case OMPD_teams:
9785     case OMPD_target_data:
9786     case OMPD_target_exit_data:
9787     case OMPD_target_enter_data:
9788     case OMPD_distribute:
9789     case OMPD_distribute_simd:
9790     case OMPD_distribute_parallel_for:
9791     case OMPD_distribute_parallel_for_simd:
9792     case OMPD_teams_distribute:
9793     case OMPD_teams_distribute_simd:
9794     case OMPD_teams_distribute_parallel_for:
9795     case OMPD_teams_distribute_parallel_for_simd:
9796     case OMPD_target_update:
9797     case OMPD_declare_simd:
9798     case OMPD_declare_variant:
9799     case OMPD_begin_declare_variant:
9800     case OMPD_end_declare_variant:
9801     case OMPD_declare_target:
9802     case OMPD_end_declare_target:
9803     case OMPD_declare_reduction:
9804     case OMPD_declare_mapper:
9805     case OMPD_taskloop:
9806     case OMPD_taskloop_simd:
9807     case OMPD_master_taskloop:
9808     case OMPD_master_taskloop_simd:
9809     case OMPD_parallel_master_taskloop:
9810     case OMPD_parallel_master_taskloop_simd:
9811     case OMPD_requires:
9812     case OMPD_metadirective:
9813     case OMPD_unknown:
9814     default:
9815       llvm_unreachable("Unexpected directive.");
9816     }
9817   }
9818 
9819   return nullptr;
9820 }
9821 
9822 /// Emit the user-defined mapper function. The code generation follows the
9823 /// pattern in the example below.
9824 /// \code
9825 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9826 ///                                           void *base, void *begin,
9827 ///                                           int64_t size, int64_t type,
9828 ///                                           void *name = nullptr) {
9829 ///   // Allocate space for an array section first or add a base/begin for
9830 ///   // pointer dereference.
9831 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9832 ///       !maptype.IsDelete)
9833 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9834 ///                                 size*sizeof(Ty), clearToFromMember(type));
9835 ///   // Map members.
9836 ///   for (unsigned i = 0; i < size; i++) {
9837 ///     // For each component specified by this mapper:
9838 ///     for (auto c : begin[i]->all_components) {
9839 ///       if (c.hasMapper())
9840 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9841 ///                       c.arg_type, c.arg_name);
9842 ///       else
9843 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9844 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9845 ///                                     c.arg_name);
9846 ///     }
9847 ///   }
9848 ///   // Delete the array section.
9849 ///   if (size > 1 && maptype.IsDelete)
9850 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9851 ///                                 size*sizeof(Ty), clearToFromMember(type));
9852 /// }
9853 /// \endcode
9854 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9855                                             CodeGenFunction *CGF) {
9856   if (UDMMap.count(D) > 0)
9857     return;
9858   ASTContext &C = CGM.getContext();
9859   QualType Ty = D->getType();
9860   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9861   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9862   auto *MapperVarDecl =
9863       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9864   SourceLocation Loc = D->getLocation();
9865   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9866   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9867 
9868   // Prepare mapper function arguments and attributes.
9869   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9870                               C.VoidPtrTy, ImplicitParamDecl::Other);
9871   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9872                             ImplicitParamDecl::Other);
9873   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9874                              C.VoidPtrTy, ImplicitParamDecl::Other);
9875   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9876                             ImplicitParamDecl::Other);
9877   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9878                             ImplicitParamDecl::Other);
9879   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9880                             ImplicitParamDecl::Other);
9881   FunctionArgList Args;
9882   Args.push_back(&HandleArg);
9883   Args.push_back(&BaseArg);
9884   Args.push_back(&BeginArg);
9885   Args.push_back(&SizeArg);
9886   Args.push_back(&TypeArg);
9887   Args.push_back(&NameArg);
9888   const CGFunctionInfo &FnInfo =
9889       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9890   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9891   SmallString<64> TyStr;
9892   llvm::raw_svector_ostream Out(TyStr);
9893   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9894   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9895   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9896                                     Name, &CGM.getModule());
9897   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9898   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9899   // Start the mapper function code generation.
9900   CodeGenFunction MapperCGF(CGM);
9901   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9902   // Compute the starting and end addresses of array elements.
9903   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9904       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9905       C.getPointerType(Int64Ty), Loc);
9906   // Prepare common arguments for array initiation and deletion.
9907   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9908       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9909       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9910   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9911       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9912       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9913   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9914       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9915       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9916   // Convert the size in bytes into the number of array elements.
9917   Size = MapperCGF.Builder.CreateExactUDiv(
9918       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9919   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9920       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9921   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9922   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9923       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9924       C.getPointerType(Int64Ty), Loc);
9925   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9926       MapperCGF.GetAddrOfLocalVar(&NameArg),
9927       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9928 
9929   // Emit array initiation if this is an array section and \p MapType indicates
9930   // that memory allocation is required.
9931   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9932   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9933                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9934 
9935   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9936 
9937   // Emit the loop header block.
9938   MapperCGF.EmitBlock(HeadBB);
9939   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9940   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9941   // Evaluate whether the initial condition is satisfied.
9942   llvm::Value *IsEmpty =
9943       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9944   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9945   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9946 
9947   // Emit the loop body block.
9948   MapperCGF.EmitBlock(BodyBB);
9949   llvm::BasicBlock *LastBB = BodyBB;
9950   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9951       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9952   PtrPHI->addIncoming(PtrBegin, EntryBB);
9953   Address PtrCurrent(PtrPHI, ElemTy,
9954                      MapperCGF.GetAddrOfLocalVar(&BeginArg)
9955                          .getAlignment()
9956                          .alignmentOfArrayElement(ElementSize));
9957   // Privatize the declared variable of mapper to be the current array element.
9958   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9959   Scope.addPrivate(MapperVarDecl, PtrCurrent);
9960   (void)Scope.Privatize();
9961 
9962   // Get map clause information. Fill up the arrays with all mapped variables.
9963   MappableExprsHandler::MapCombinedInfoTy Info;
9964   MappableExprsHandler MEHandler(*D, MapperCGF);
9965   MEHandler.generateAllInfoForMapper(Info);
9966 
9967   // Call the runtime API __tgt_mapper_num_components to get the number of
9968   // pre-existing components.
9969   llvm::Value *OffloadingArgs[] = {Handle};
9970   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9971       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9972                                             OMPRTL___tgt_mapper_num_components),
9973       OffloadingArgs);
9974   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9975       PreviousSize,
9976       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9977 
9978   // Fill up the runtime mapper handle for all components.
9979   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9980     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9981         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9982     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9983         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9984     llvm::Value *CurSizeArg = Info.Sizes[I];
9985     llvm::Value *CurNameArg =
9986         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9987             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9988             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9989 
9990     // Extract the MEMBER_OF field from the map type.
9991     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9992     llvm::Value *MemberMapType =
9993         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9994 
9995     // Combine the map type inherited from user-defined mapper with that
9996     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9997     // bits of the \a MapType, which is the input argument of the mapper
9998     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9999     // bits of MemberMapType.
10000     // [OpenMP 5.0], 1.2.6. map-type decay.
10001     //        | alloc |  to   | from  | tofrom | release | delete
10002     // ----------------------------------------------------------
10003     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10004     // to     | alloc |  to   | alloc |   to   | release | delete
10005     // from   | alloc | alloc | from  |  from  | release | delete
10006     // tofrom | alloc |  to   | from  | tofrom | release | delete
10007     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10008         MapType,
10009         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10010                                    MappableExprsHandler::OMP_MAP_FROM));
10011     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10012     llvm::BasicBlock *AllocElseBB =
10013         MapperCGF.createBasicBlock("omp.type.alloc.else");
10014     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10015     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10016     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10017     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10018     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10019     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10020     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10021     MapperCGF.EmitBlock(AllocBB);
10022     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10023         MemberMapType,
10024         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10025                                      MappableExprsHandler::OMP_MAP_FROM)));
10026     MapperCGF.Builder.CreateBr(EndBB);
10027     MapperCGF.EmitBlock(AllocElseBB);
10028     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10029         LeftToFrom,
10030         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10031     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10032     // In case of to, clear OMP_MAP_FROM.
10033     MapperCGF.EmitBlock(ToBB);
10034     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10035         MemberMapType,
10036         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10037     MapperCGF.Builder.CreateBr(EndBB);
10038     MapperCGF.EmitBlock(ToElseBB);
10039     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10040         LeftToFrom,
10041         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10042     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10043     // In case of from, clear OMP_MAP_TO.
10044     MapperCGF.EmitBlock(FromBB);
10045     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10046         MemberMapType,
10047         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10048     // In case of tofrom, do nothing.
10049     MapperCGF.EmitBlock(EndBB);
10050     LastBB = EndBB;
10051     llvm::PHINode *CurMapType =
10052         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10053     CurMapType->addIncoming(AllocMapType, AllocBB);
10054     CurMapType->addIncoming(ToMapType, ToBB);
10055     CurMapType->addIncoming(FromMapType, FromBB);
10056     CurMapType->addIncoming(MemberMapType, ToElseBB);
10057 
10058     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10059                                      CurSizeArg, CurMapType, CurNameArg};
10060     if (Info.Mappers[I]) {
10061       // Call the corresponding mapper function.
10062       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10063           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10064       assert(MapperFunc && "Expect a valid mapper function is available.");
10065       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10066     } else {
10067       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10068       // data structure.
10069       MapperCGF.EmitRuntimeCall(
10070           OMPBuilder.getOrCreateRuntimeFunction(
10071               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10072           OffloadingArgs);
10073     }
10074   }
10075 
10076   // Update the pointer to point to the next element that needs to be mapped,
10077   // and check whether we have mapped all elements.
10078   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10079       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10080   PtrPHI->addIncoming(PtrNext, LastBB);
10081   llvm::Value *IsDone =
10082       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10083   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10084   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10085 
10086   MapperCGF.EmitBlock(ExitBB);
10087   // Emit array deletion if this is an array section and \p MapType indicates
10088   // that deletion is required.
10089   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10090                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10091 
10092   // Emit the function exit block.
10093   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10094   MapperCGF.FinishFunction();
10095   UDMMap.try_emplace(D, Fn);
10096   if (CGF) {
10097     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10098     Decls.second.push_back(D);
10099   }
10100 }
10101 
10102 /// Emit the array initialization or deletion portion for user-defined mapper
10103 /// code generation. First, it evaluates whether an array section is mapped and
10104 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10105 /// true, and \a MapType indicates to not delete this array, array
10106 /// initialization code is generated. If \a IsInit is false, and \a MapType
10107 /// indicates to not this array, array deletion code is generated.
10108 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10109     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10110     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10111     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10112     bool IsInit) {
10113   StringRef Prefix = IsInit ? ".init" : ".del";
10114 
10115   // Evaluate if this is an array section.
10116   llvm::BasicBlock *BodyBB =
10117       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10118   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10119       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10120   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10121       MapType,
10122       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10123   llvm::Value *DeleteCond;
10124   llvm::Value *Cond;
10125   if (IsInit) {
10126     // base != begin?
10127     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10128     // IsPtrAndObj?
10129     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10130         MapType,
10131         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10132     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10133     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10134     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10135     DeleteCond = MapperCGF.Builder.CreateIsNull(
10136         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10137   } else {
10138     Cond = IsArray;
10139     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10140         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10141   }
10142   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10143   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10144 
10145   MapperCGF.EmitBlock(BodyBB);
10146   // Get the array size by multiplying element size and element number (i.e., \p
10147   // Size).
10148   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10149       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10150   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10151   // memory allocation/deletion purpose only.
10152   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10153       MapType,
10154       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10155                                    MappableExprsHandler::OMP_MAP_FROM)));
10156   MapTypeArg = MapperCGF.Builder.CreateOr(
10157       MapTypeArg,
10158       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10159 
10160   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10161   // data structure.
10162   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10163                                    ArraySize, MapTypeArg, MapName};
10164   MapperCGF.EmitRuntimeCall(
10165       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10166                                             OMPRTL___tgt_push_mapper_component),
10167       OffloadingArgs);
10168 }
10169 
10170 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10171     const OMPDeclareMapperDecl *D) {
10172   auto I = UDMMap.find(D);
10173   if (I != UDMMap.end())
10174     return I->second;
10175   emitUserDefinedMapper(D);
10176   return UDMMap.lookup(D);
10177 }
10178 
10179 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10180     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10181     llvm::Value *DeviceID,
10182     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10183                                      const OMPLoopDirective &D)>
10184         SizeEmitter) {
10185   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10186   const OMPExecutableDirective *TD = &D;
10187   // Get nested teams distribute kind directive, if any.
10188   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10189     TD = getNestedDistributeDirective(CGM.getContext(), D);
10190   if (!TD)
10191     return;
10192   const auto *LD = cast<OMPLoopDirective>(TD);
10193   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10194                                                          PrePostActionTy &) {
10195     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10196       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10197       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10198       CGF.EmitRuntimeCall(
10199           OMPBuilder.getOrCreateRuntimeFunction(
10200               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10201           Args);
10202     }
10203   };
10204   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10205 }
10206 
10207 void CGOpenMPRuntime::emitTargetCall(
10208     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10209     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10210     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10211     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10212                                      const OMPLoopDirective &D)>
10213         SizeEmitter) {
10214   if (!CGF.HaveInsertPoint())
10215     return;
10216 
10217   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
10218                                    CGM.getLangOpts().OpenMPOffloadMandatory;
10219 
10220   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10221 
10222   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10223                                  D.hasClausesOfKind<OMPNowaitClause>();
10224   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10225   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10226   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10227                                             PrePostActionTy &) {
10228     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10229   };
10230   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10231 
10232   CodeGenFunction::OMPTargetDataInfo InputInfo;
10233   llvm::Value *MapTypesArray = nullptr;
10234   llvm::Value *MapNamesArray = nullptr;
10235   // Generate code for the host fallback function.
10236   auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
10237                         &CS, OffloadingMandatory](CodeGenFunction &CGF) {
10238     if (OffloadingMandatory) {
10239       CGF.Builder.CreateUnreachable();
10240     } else {
10241       if (RequiresOuterTask) {
10242         CapturedVars.clear();
10243         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10244       }
10245       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10246     }
10247   };
10248   // Fill up the pointer arrays and transfer execution to the device.
10249   auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
10250                     &MapNamesArray, SizeEmitter,
10251                     FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10252     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10253       // Reverse offloading is not supported, so just execute on the host.
10254       FallbackGen(CGF);
10255       return;
10256     }
10257 
10258     // On top of the arrays that were filled up, the target offloading call
10259     // takes as arguments the device id as well as the host pointer. The host
10260     // pointer is used by the runtime library to identify the current target
10261     // region, so it only has to be unique and not necessarily point to
10262     // anything. It could be the pointer to the outlined function that
10263     // implements the target region, but we aren't using that so that the
10264     // compiler doesn't need to keep that, and could therefore inline the host
10265     // function if proven worthwhile during optimization.
10266 
10267     // From this point on, we need to have an ID of the target region defined.
10268     assert(OutlinedFnID && "Invalid outlined function ID!");
10269     (void)OutlinedFnID;
10270 
10271     // Emit device ID if any.
10272     llvm::Value *DeviceID;
10273     if (Device.getPointer()) {
10274       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10275               Device.getInt() == OMPC_DEVICE_device_num) &&
10276              "Expected device_num modifier.");
10277       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10278       DeviceID =
10279           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10280     } else {
10281       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10282     }
10283 
10284     // Emit the number of elements in the offloading arrays.
10285     llvm::Value *PointerNum =
10286         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10287 
10288     // Return value of the runtime offloading call.
10289     llvm::Value *Return;
10290 
10291     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10292     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10293 
10294     // Source location for the ident struct
10295     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10296 
10297     // Emit tripcount for the target loop-based directive.
10298     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10299 
10300     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10301     // The target region is an outlined function launched by the runtime
10302     // via calls __tgt_target() or __tgt_target_teams().
10303     //
10304     // __tgt_target() launches a target region with one team and one thread,
10305     // executing a serial region.  This master thread may in turn launch
10306     // more threads within its team upon encountering a parallel region,
10307     // however, no additional teams can be launched on the device.
10308     //
10309     // __tgt_target_teams() launches a target region with one or more teams,
10310     // each with one or more threads.  This call is required for target
10311     // constructs such as:
10312     //  'target teams'
10313     //  'target' / 'teams'
10314     //  'target teams distribute parallel for'
10315     //  'target parallel'
10316     // and so on.
10317     //
10318     // Note that on the host and CPU targets, the runtime implementation of
10319     // these calls simply call the outlined function without forking threads.
10320     // The outlined functions themselves have runtime calls to
10321     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10322     // the compiler in emitTeamsCall() and emitParallelCall().
10323     //
10324     // In contrast, on the NVPTX target, the implementation of
10325     // __tgt_target_teams() launches a GPU kernel with the requested number
10326     // of teams and threads so no additional calls to the runtime are required.
10327     if (NumTeams) {
10328       // If we have NumTeams defined this means that we have an enclosed teams
10329       // region. Therefore we also expect to have NumThreads defined. These two
10330       // values should be defined in the presence of a teams directive,
10331       // regardless of having any clauses associated. If the user is using teams
10332       // but no clauses, these two values will be the default that should be
10333       // passed to the runtime library - a 32-bit integer with the value zero.
10334       assert(NumThreads && "Thread limit expression should be available along "
10335                            "with number of teams.");
10336       SmallVector<llvm::Value *> OffloadingArgs = {
10337           RTLoc,
10338           DeviceID,
10339           OutlinedFnID,
10340           PointerNum,
10341           InputInfo.BasePointersArray.getPointer(),
10342           InputInfo.PointersArray.getPointer(),
10343           InputInfo.SizesArray.getPointer(),
10344           MapTypesArray,
10345           MapNamesArray,
10346           InputInfo.MappersArray.getPointer(),
10347           NumTeams,
10348           NumThreads};
10349       if (HasNowait) {
10350         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10351         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10352         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10353         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10354         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10355         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10356       }
10357       Return = CGF.EmitRuntimeCall(
10358           OMPBuilder.getOrCreateRuntimeFunction(
10359               CGM.getModule(), HasNowait
10360                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10361                                    : OMPRTL___tgt_target_teams_mapper),
10362           OffloadingArgs);
10363     } else {
10364       SmallVector<llvm::Value *> OffloadingArgs = {
10365           RTLoc,
10366           DeviceID,
10367           OutlinedFnID,
10368           PointerNum,
10369           InputInfo.BasePointersArray.getPointer(),
10370           InputInfo.PointersArray.getPointer(),
10371           InputInfo.SizesArray.getPointer(),
10372           MapTypesArray,
10373           MapNamesArray,
10374           InputInfo.MappersArray.getPointer()};
10375       if (HasNowait) {
10376         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10377         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10378         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10379         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10380         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10381         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10382       }
10383       Return = CGF.EmitRuntimeCall(
10384           OMPBuilder.getOrCreateRuntimeFunction(
10385               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10386                                          : OMPRTL___tgt_target_mapper),
10387           OffloadingArgs);
10388     }
10389 
10390     // Check the error code and execute the host version if required.
10391     llvm::BasicBlock *OffloadFailedBlock =
10392         CGF.createBasicBlock("omp_offload.failed");
10393     llvm::BasicBlock *OffloadContBlock =
10394         CGF.createBasicBlock("omp_offload.cont");
10395     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10396     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10397 
10398     CGF.EmitBlock(OffloadFailedBlock);
10399     FallbackGen(CGF);
10400 
10401     CGF.EmitBranch(OffloadContBlock);
10402 
10403     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10404   };
10405 
10406   // Notify that the host version must be executed.
10407   auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10408     FallbackGen(CGF);
10409   };
10410 
10411   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10412                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10413                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10414     // Fill up the arrays with all the captured variables.
10415     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10416 
10417     // Get mappable expression information.
10418     MappableExprsHandler MEHandler(D, CGF);
10419     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10420     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10421 
10422     auto RI = CS.getCapturedRecordDecl()->field_begin();
10423     auto *CV = CapturedVars.begin();
10424     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10425                                               CE = CS.capture_end();
10426          CI != CE; ++CI, ++RI, ++CV) {
10427       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10428       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10429 
10430       // VLA sizes are passed to the outlined region by copy and do not have map
10431       // information associated.
10432       if (CI->capturesVariableArrayType()) {
10433         CurInfo.Exprs.push_back(nullptr);
10434         CurInfo.BasePointers.push_back(*CV);
10435         CurInfo.Pointers.push_back(*CV);
10436         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10437             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10438         // Copy to the device as an argument. No need to retrieve it.
10439         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10440                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10441                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10442         CurInfo.Mappers.push_back(nullptr);
10443       } else {
10444         // If we have any information in the map clause, we use it, otherwise we
10445         // just do a default mapping.
10446         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10447         if (!CI->capturesThis())
10448           MappedVarSet.insert(CI->getCapturedVar());
10449         else
10450           MappedVarSet.insert(nullptr);
10451         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10452           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10453         // Generate correct mapping for variables captured by reference in
10454         // lambdas.
10455         if (CI->capturesVariable())
10456           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10457                                                   CurInfo, LambdaPointers);
10458       }
10459       // We expect to have at least an element of information for this capture.
10460       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10461              "Non-existing map pointer for capture!");
10462       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10463              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10464              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10465              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10466              "Inconsistent map information sizes!");
10467 
10468       // If there is an entry in PartialStruct it means we have a struct with
10469       // individual members mapped. Emit an extra combined entry.
10470       if (PartialStruct.Base.isValid()) {
10471         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10472         MEHandler.emitCombinedEntry(
10473             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10474             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10475       }
10476 
10477       // We need to append the results of this capture to what we already have.
10478       CombinedInfo.append(CurInfo);
10479     }
10480     // Adjust MEMBER_OF flags for the lambdas captures.
10481     MEHandler.adjustMemberOfForLambdaCaptures(
10482         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10483         CombinedInfo.Types);
10484     // Map any list items in a map clause that were not captures because they
10485     // weren't referenced within the construct.
10486     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10487 
10488     TargetDataInfo Info;
10489     // Fill up the arrays and create the arguments.
10490     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10491     emitOffloadingArraysArgument(
10492         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10493         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10494         {/*ForEndCall=*/false});
10495 
10496     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10497     InputInfo.BasePointersArray =
10498         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10499     InputInfo.PointersArray =
10500         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10501     InputInfo.SizesArray =
10502         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10503     InputInfo.MappersArray =
10504         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10505     MapTypesArray = Info.MapTypesArray;
10506     MapNamesArray = Info.MapNamesArray;
10507     if (RequiresOuterTask)
10508       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10509     else
10510       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10511   };
10512 
10513   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10514                              CodeGenFunction &CGF, PrePostActionTy &) {
10515     if (RequiresOuterTask) {
10516       CodeGenFunction::OMPTargetDataInfo InputInfo;
10517       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10518     } else {
10519       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10520     }
10521   };
10522 
10523   // If we have a target function ID it means that we need to support
10524   // offloading, otherwise, just execute on the host. We need to execute on host
10525   // regardless of the conditional in the if clause if, e.g., the user do not
10526   // specify target triples.
10527   if (OutlinedFnID) {
10528     if (IfCond) {
10529       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10530     } else {
10531       RegionCodeGenTy ThenRCG(TargetThenGen);
10532       ThenRCG(CGF);
10533     }
10534   } else {
10535     RegionCodeGenTy ElseRCG(TargetElseGen);
10536     ElseRCG(CGF);
10537   }
10538 }
10539 
10540 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10541                                                     StringRef ParentName) {
10542   if (!S)
10543     return;
10544 
10545   // Codegen OMP target directives that offload compute to the device.
10546   bool RequiresDeviceCodegen =
10547       isa<OMPExecutableDirective>(S) &&
10548       isOpenMPTargetExecutionDirective(
10549           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10550 
10551   if (RequiresDeviceCodegen) {
10552     const auto &E = *cast<OMPExecutableDirective>(S);
10553     unsigned DeviceID;
10554     unsigned FileID;
10555     unsigned Line;
10556     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10557                              FileID, Line);
10558 
10559     // Is this a target region that should not be emitted as an entry point? If
10560     // so just signal we are done with this target region.
10561     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10562                                                             ParentName, Line))
10563       return;
10564 
10565     switch (E.getDirectiveKind()) {
10566     case OMPD_target:
10567       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10568                                                    cast<OMPTargetDirective>(E));
10569       break;
10570     case OMPD_target_parallel:
10571       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10572           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10573       break;
10574     case OMPD_target_teams:
10575       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10576           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10577       break;
10578     case OMPD_target_teams_distribute:
10579       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10580           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10581       break;
10582     case OMPD_target_teams_distribute_simd:
10583       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10584           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10585       break;
10586     case OMPD_target_parallel_for:
10587       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10588           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10589       break;
10590     case OMPD_target_parallel_for_simd:
10591       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10592           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10593       break;
10594     case OMPD_target_simd:
10595       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10596           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10597       break;
10598     case OMPD_target_teams_distribute_parallel_for:
10599       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10600           CGM, ParentName,
10601           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10602       break;
10603     case OMPD_target_teams_distribute_parallel_for_simd:
10604       CodeGenFunction::
10605           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10606               CGM, ParentName,
10607               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10608       break;
10609     case OMPD_parallel:
10610     case OMPD_for:
10611     case OMPD_parallel_for:
10612     case OMPD_parallel_master:
10613     case OMPD_parallel_sections:
10614     case OMPD_for_simd:
10615     case OMPD_parallel_for_simd:
10616     case OMPD_cancel:
10617     case OMPD_cancellation_point:
10618     case OMPD_ordered:
10619     case OMPD_threadprivate:
10620     case OMPD_allocate:
10621     case OMPD_task:
10622     case OMPD_simd:
10623     case OMPD_tile:
10624     case OMPD_unroll:
10625     case OMPD_sections:
10626     case OMPD_section:
10627     case OMPD_single:
10628     case OMPD_master:
10629     case OMPD_critical:
10630     case OMPD_taskyield:
10631     case OMPD_barrier:
10632     case OMPD_taskwait:
10633     case OMPD_taskgroup:
10634     case OMPD_atomic:
10635     case OMPD_flush:
10636     case OMPD_depobj:
10637     case OMPD_scan:
10638     case OMPD_teams:
10639     case OMPD_target_data:
10640     case OMPD_target_exit_data:
10641     case OMPD_target_enter_data:
10642     case OMPD_distribute:
10643     case OMPD_distribute_simd:
10644     case OMPD_distribute_parallel_for:
10645     case OMPD_distribute_parallel_for_simd:
10646     case OMPD_teams_distribute:
10647     case OMPD_teams_distribute_simd:
10648     case OMPD_teams_distribute_parallel_for:
10649     case OMPD_teams_distribute_parallel_for_simd:
10650     case OMPD_target_update:
10651     case OMPD_declare_simd:
10652     case OMPD_declare_variant:
10653     case OMPD_begin_declare_variant:
10654     case OMPD_end_declare_variant:
10655     case OMPD_declare_target:
10656     case OMPD_end_declare_target:
10657     case OMPD_declare_reduction:
10658     case OMPD_declare_mapper:
10659     case OMPD_taskloop:
10660     case OMPD_taskloop_simd:
10661     case OMPD_master_taskloop:
10662     case OMPD_master_taskloop_simd:
10663     case OMPD_parallel_master_taskloop:
10664     case OMPD_parallel_master_taskloop_simd:
10665     case OMPD_requires:
10666     case OMPD_metadirective:
10667     case OMPD_unknown:
10668     default:
10669       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10670     }
10671     return;
10672   }
10673 
10674   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10675     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10676       return;
10677 
10678     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10679     return;
10680   }
10681 
10682   // If this is a lambda function, look into its body.
10683   if (const auto *L = dyn_cast<LambdaExpr>(S))
10684     S = L->getBody();
10685 
10686   // Keep looking for target regions recursively.
10687   for (const Stmt *II : S->children())
10688     scanForTargetRegionsFunctions(II, ParentName);
10689 }
10690 
10691 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10692   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10693       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10694   if (!DevTy)
10695     return false;
10696   // Do not emit device_type(nohost) functions for the host.
10697   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10698     return true;
10699   // Do not emit device_type(host) functions for the device.
10700   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10701     return true;
10702   return false;
10703 }
10704 
10705 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10706   // If emitting code for the host, we do not process FD here. Instead we do
10707   // the normal code generation.
10708   if (!CGM.getLangOpts().OpenMPIsDevice) {
10709     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10710       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10711                                   CGM.getLangOpts().OpenMPIsDevice))
10712         return true;
10713     return false;
10714   }
10715 
10716   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10717   // Try to detect target regions in the function.
10718   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10719     StringRef Name = CGM.getMangledName(GD);
10720     scanForTargetRegionsFunctions(FD->getBody(), Name);
10721     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10722                                 CGM.getLangOpts().OpenMPIsDevice))
10723       return true;
10724   }
10725 
10726   // Do not to emit function if it is not marked as declare target.
10727   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10728          AlreadyEmittedTargetDecls.count(VD) == 0;
10729 }
10730 
10731 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10732   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10733                               CGM.getLangOpts().OpenMPIsDevice))
10734     return true;
10735 
10736   if (!CGM.getLangOpts().OpenMPIsDevice)
10737     return false;
10738 
10739   // Check if there are Ctors/Dtors in this declaration and look for target
10740   // regions in it. We use the complete variant to produce the kernel name
10741   // mangling.
10742   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10743   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10744     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10745       StringRef ParentName =
10746           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10747       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10748     }
10749     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10750       StringRef ParentName =
10751           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10752       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10753     }
10754   }
10755 
10756   // Do not to emit variable if it is not marked as declare target.
10757   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10758       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10759           cast<VarDecl>(GD.getDecl()));
10760   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10761       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10762        HasRequiresUnifiedSharedMemory)) {
10763     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10764     return true;
10765   }
10766   return false;
10767 }
10768 
10769 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10770                                                    llvm::Constant *Addr) {
10771   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10772       !CGM.getLangOpts().OpenMPIsDevice)
10773     return;
10774 
10775   // If we have host/nohost variables, they do not need to be registered.
10776   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10777       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10778   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10779     return;
10780 
10781   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10782       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10783   if (!Res) {
10784     if (CGM.getLangOpts().OpenMPIsDevice) {
10785       // Register non-target variables being emitted in device code (debug info
10786       // may cause this).
10787       StringRef VarName = CGM.getMangledName(VD);
10788       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10789     }
10790     return;
10791   }
10792   // Register declare target variables.
10793   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10794   StringRef VarName;
10795   CharUnits VarSize;
10796   llvm::GlobalValue::LinkageTypes Linkage;
10797 
10798   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10799       !HasRequiresUnifiedSharedMemory) {
10800     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10801     VarName = CGM.getMangledName(VD);
10802     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10803       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10804       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10805     } else {
10806       VarSize = CharUnits::Zero();
10807     }
10808     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10809     // Temp solution to prevent optimizations of the internal variables.
10810     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10811       // Do not create a "ref-variable" if the original is not also available
10812       // on the host.
10813       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10814         return;
10815       std::string RefName = getName({VarName, "ref"});
10816       if (!CGM.GetGlobalValue(RefName)) {
10817         llvm::Constant *AddrRef =
10818             getOrCreateInternalVariable(Addr->getType(), RefName);
10819         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10820         GVAddrRef->setConstant(/*Val=*/true);
10821         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10822         GVAddrRef->setInitializer(Addr);
10823         CGM.addCompilerUsedGlobal(GVAddrRef);
10824       }
10825     }
10826   } else {
10827     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10828             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10829              HasRequiresUnifiedSharedMemory)) &&
10830            "Declare target attribute must link or to with unified memory.");
10831     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10832       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10833     else
10834       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10835 
10836     if (CGM.getLangOpts().OpenMPIsDevice) {
10837       VarName = Addr->getName();
10838       Addr = nullptr;
10839     } else {
10840       VarName = getAddrOfDeclareTargetVar(VD).getName();
10841       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10842     }
10843     VarSize = CGM.getPointerSize();
10844     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10845   }
10846 
10847   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10848       VarName, Addr, VarSize, Flags, Linkage);
10849 }
10850 
10851 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10852   if (isa<FunctionDecl>(GD.getDecl()) ||
10853       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10854     return emitTargetFunctions(GD);
10855 
10856   return emitTargetGlobalVariable(GD);
10857 }
10858 
10859 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10860   for (const VarDecl *VD : DeferredGlobalVariables) {
10861     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10862         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10863     if (!Res)
10864       continue;
10865     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10866         !HasRequiresUnifiedSharedMemory) {
10867       CGM.EmitGlobal(VD);
10868     } else {
10869       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10870               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10871                HasRequiresUnifiedSharedMemory)) &&
10872              "Expected link clause or to clause with unified memory.");
10873       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10874     }
10875   }
10876 }
10877 
10878 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10879     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10880   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10881          " Expected target-based directive.");
10882 }
10883 
10884 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10885   for (const OMPClause *Clause : D->clauselists()) {
10886     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10887       HasRequiresUnifiedSharedMemory = true;
10888     } else if (const auto *AC =
10889                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10890       switch (AC->getAtomicDefaultMemOrderKind()) {
10891       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10892         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10893         break;
10894       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10895         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10896         break;
10897       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10898         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10899         break;
10900       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10901         break;
10902       }
10903     }
10904   }
10905 }
10906 
10907 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10908   return RequiresAtomicOrdering;
10909 }
10910 
10911 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10912                                                        LangAS &AS) {
10913   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10914     return false;
10915   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10916   switch(A->getAllocatorType()) {
10917   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10918   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10919   // Not supported, fallback to the default mem space.
10920   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10921   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10922   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10923   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10924   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10925   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10926   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10927     AS = LangAS::Default;
10928     return true;
10929   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10930     llvm_unreachable("Expected predefined allocator for the variables with the "
10931                      "static storage.");
10932   }
10933   return false;
10934 }
10935 
10936 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10937   return HasRequiresUnifiedSharedMemory;
10938 }
10939 
10940 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10941     CodeGenModule &CGM)
10942     : CGM(CGM) {
10943   if (CGM.getLangOpts().OpenMPIsDevice) {
10944     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10945     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10946   }
10947 }
10948 
10949 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10950   if (CGM.getLangOpts().OpenMPIsDevice)
10951     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10952 }
10953 
10954 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10955   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10956     return true;
10957 
10958   const auto *D = cast<FunctionDecl>(GD.getDecl());
10959   // Do not to emit function if it is marked as declare target as it was already
10960   // emitted.
10961   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10962     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10963       if (auto *F = dyn_cast_or_null<llvm::Function>(
10964               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10965         return !F->isDeclaration();
10966       return false;
10967     }
10968     return true;
10969   }
10970 
10971   return !AlreadyEmittedTargetDecls.insert(D).second;
10972 }
10973 
10974 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10975   // If we don't have entries or if we are emitting code for the device, we
10976   // don't need to do anything.
10977   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10978       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10979       (OffloadEntriesInfoManager.empty() &&
10980        !HasEmittedDeclareTargetRegion &&
10981        !HasEmittedTargetRegion))
10982     return nullptr;
10983 
10984   // Create and register the function that handles the requires directives.
10985   ASTContext &C = CGM.getContext();
10986 
10987   llvm::Function *RequiresRegFn;
10988   {
10989     CodeGenFunction CGF(CGM);
10990     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10991     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10992     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10993     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10994     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10995     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10996     // TODO: check for other requires clauses.
10997     // The requires directive takes effect only when a target region is
10998     // present in the compilation unit. Otherwise it is ignored and not
10999     // passed to the runtime. This avoids the runtime from throwing an error
11000     // for mismatching requires clauses across compilation units that don't
11001     // contain at least 1 target region.
11002     assert((HasEmittedTargetRegion ||
11003             HasEmittedDeclareTargetRegion ||
11004             !OffloadEntriesInfoManager.empty()) &&
11005            "Target or declare target region expected.");
11006     if (HasRequiresUnifiedSharedMemory)
11007       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11008     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11009                             CGM.getModule(), OMPRTL___tgt_register_requires),
11010                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11011     CGF.FinishFunction();
11012   }
11013   return RequiresRegFn;
11014 }
11015 
11016 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11017                                     const OMPExecutableDirective &D,
11018                                     SourceLocation Loc,
11019                                     llvm::Function *OutlinedFn,
11020                                     ArrayRef<llvm::Value *> CapturedVars) {
11021   if (!CGF.HaveInsertPoint())
11022     return;
11023 
11024   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11025   CodeGenFunction::RunCleanupsScope Scope(CGF);
11026 
11027   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11028   llvm::Value *Args[] = {
11029       RTLoc,
11030       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11031       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11032   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11033   RealArgs.append(std::begin(Args), std::end(Args));
11034   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11035 
11036   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11037       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11038   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11039 }
11040 
11041 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11042                                          const Expr *NumTeams,
11043                                          const Expr *ThreadLimit,
11044                                          SourceLocation Loc) {
11045   if (!CGF.HaveInsertPoint())
11046     return;
11047 
11048   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11049 
11050   llvm::Value *NumTeamsVal =
11051       NumTeams
11052           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11053                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11054           : CGF.Builder.getInt32(0);
11055 
11056   llvm::Value *ThreadLimitVal =
11057       ThreadLimit
11058           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11059                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11060           : CGF.Builder.getInt32(0);
11061 
11062   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11063   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11064                                      ThreadLimitVal};
11065   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11066                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11067                       PushNumTeamsArgs);
11068 }
11069 
11070 void CGOpenMPRuntime::emitTargetDataCalls(
11071     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11072     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11073   if (!CGF.HaveInsertPoint())
11074     return;
11075 
11076   // Action used to replace the default codegen action and turn privatization
11077   // off.
11078   PrePostActionTy NoPrivAction;
11079 
11080   // Generate the code for the opening of the data environment. Capture all the
11081   // arguments of the runtime call by reference because they are used in the
11082   // closing of the region.
11083   auto &&BeginThenGen = [this, &D, Device, &Info,
11084                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11085     // Fill up the arrays with all the mapped variables.
11086     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11087 
11088     // Get map clause information.
11089     MappableExprsHandler MEHandler(D, CGF);
11090     MEHandler.generateAllInfo(CombinedInfo);
11091 
11092     // Fill up the arrays and create the arguments.
11093     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11094                          /*IsNonContiguous=*/true);
11095 
11096     llvm::Value *BasePointersArrayArg = nullptr;
11097     llvm::Value *PointersArrayArg = nullptr;
11098     llvm::Value *SizesArrayArg = nullptr;
11099     llvm::Value *MapTypesArrayArg = nullptr;
11100     llvm::Value *MapNamesArrayArg = nullptr;
11101     llvm::Value *MappersArrayArg = nullptr;
11102     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11103                                  SizesArrayArg, MapTypesArrayArg,
11104                                  MapNamesArrayArg, MappersArrayArg, Info);
11105 
11106     // Emit device ID if any.
11107     llvm::Value *DeviceID = nullptr;
11108     if (Device) {
11109       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11110                                            CGF.Int64Ty, /*isSigned=*/true);
11111     } else {
11112       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11113     }
11114 
11115     // Emit the number of elements in the offloading arrays.
11116     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11117     //
11118     // Source location for the ident struct
11119     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11120 
11121     llvm::Value *OffloadingArgs[] = {RTLoc,
11122                                      DeviceID,
11123                                      PointerNum,
11124                                      BasePointersArrayArg,
11125                                      PointersArrayArg,
11126                                      SizesArrayArg,
11127                                      MapTypesArrayArg,
11128                                      MapNamesArrayArg,
11129                                      MappersArrayArg};
11130     CGF.EmitRuntimeCall(
11131         OMPBuilder.getOrCreateRuntimeFunction(
11132             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11133         OffloadingArgs);
11134 
11135     // If device pointer privatization is required, emit the body of the region
11136     // here. It will have to be duplicated: with and without privatization.
11137     if (!Info.CaptureDeviceAddrMap.empty())
11138       CodeGen(CGF);
11139   };
11140 
11141   // Generate code for the closing of the data region.
11142   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11143                                                 PrePostActionTy &) {
11144     assert(Info.isValid() && "Invalid data environment closing arguments.");
11145 
11146     llvm::Value *BasePointersArrayArg = nullptr;
11147     llvm::Value *PointersArrayArg = nullptr;
11148     llvm::Value *SizesArrayArg = nullptr;
11149     llvm::Value *MapTypesArrayArg = nullptr;
11150     llvm::Value *MapNamesArrayArg = nullptr;
11151     llvm::Value *MappersArrayArg = nullptr;
11152     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11153                                  SizesArrayArg, MapTypesArrayArg,
11154                                  MapNamesArrayArg, MappersArrayArg, Info,
11155                                  {/*ForEndCall=*/true});
11156 
11157     // Emit device ID if any.
11158     llvm::Value *DeviceID = nullptr;
11159     if (Device) {
11160       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11161                                            CGF.Int64Ty, /*isSigned=*/true);
11162     } else {
11163       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11164     }
11165 
11166     // Emit the number of elements in the offloading arrays.
11167     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11168 
11169     // Source location for the ident struct
11170     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11171 
11172     llvm::Value *OffloadingArgs[] = {RTLoc,
11173                                      DeviceID,
11174                                      PointerNum,
11175                                      BasePointersArrayArg,
11176                                      PointersArrayArg,
11177                                      SizesArrayArg,
11178                                      MapTypesArrayArg,
11179                                      MapNamesArrayArg,
11180                                      MappersArrayArg};
11181     CGF.EmitRuntimeCall(
11182         OMPBuilder.getOrCreateRuntimeFunction(
11183             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11184         OffloadingArgs);
11185   };
11186 
11187   // If we need device pointer privatization, we need to emit the body of the
11188   // region with no privatization in the 'else' branch of the conditional.
11189   // Otherwise, we don't have to do anything.
11190   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11191                                                          PrePostActionTy &) {
11192     if (!Info.CaptureDeviceAddrMap.empty()) {
11193       CodeGen.setAction(NoPrivAction);
11194       CodeGen(CGF);
11195     }
11196   };
11197 
11198   // We don't have to do anything to close the region if the if clause evaluates
11199   // to false.
11200   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11201 
11202   if (IfCond) {
11203     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11204   } else {
11205     RegionCodeGenTy RCG(BeginThenGen);
11206     RCG(CGF);
11207   }
11208 
11209   // If we don't require privatization of device pointers, we emit the body in
11210   // between the runtime calls. This avoids duplicating the body code.
11211   if (Info.CaptureDeviceAddrMap.empty()) {
11212     CodeGen.setAction(NoPrivAction);
11213     CodeGen(CGF);
11214   }
11215 
11216   if (IfCond) {
11217     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11218   } else {
11219     RegionCodeGenTy RCG(EndThenGen);
11220     RCG(CGF);
11221   }
11222 }
11223 
11224 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11225     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11226     const Expr *Device) {
11227   if (!CGF.HaveInsertPoint())
11228     return;
11229 
11230   assert((isa<OMPTargetEnterDataDirective>(D) ||
11231           isa<OMPTargetExitDataDirective>(D) ||
11232           isa<OMPTargetUpdateDirective>(D)) &&
11233          "Expecting either target enter, exit data, or update directives.");
11234 
11235   CodeGenFunction::OMPTargetDataInfo InputInfo;
11236   llvm::Value *MapTypesArray = nullptr;
11237   llvm::Value *MapNamesArray = nullptr;
11238   // Generate the code for the opening of the data environment.
11239   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11240                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11241     // Emit device ID if any.
11242     llvm::Value *DeviceID = nullptr;
11243     if (Device) {
11244       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11245                                            CGF.Int64Ty, /*isSigned=*/true);
11246     } else {
11247       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11248     }
11249 
11250     // Emit the number of elements in the offloading arrays.
11251     llvm::Constant *PointerNum =
11252         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11253 
11254     // Source location for the ident struct
11255     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11256 
11257     llvm::Value *OffloadingArgs[] = {RTLoc,
11258                                      DeviceID,
11259                                      PointerNum,
11260                                      InputInfo.BasePointersArray.getPointer(),
11261                                      InputInfo.PointersArray.getPointer(),
11262                                      InputInfo.SizesArray.getPointer(),
11263                                      MapTypesArray,
11264                                      MapNamesArray,
11265                                      InputInfo.MappersArray.getPointer()};
11266 
11267     // Select the right runtime function call for each standalone
11268     // directive.
11269     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11270     RuntimeFunction RTLFn;
11271     switch (D.getDirectiveKind()) {
11272     case OMPD_target_enter_data:
11273       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11274                         : OMPRTL___tgt_target_data_begin_mapper;
11275       break;
11276     case OMPD_target_exit_data:
11277       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11278                         : OMPRTL___tgt_target_data_end_mapper;
11279       break;
11280     case OMPD_target_update:
11281       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11282                         : OMPRTL___tgt_target_data_update_mapper;
11283       break;
11284     case OMPD_parallel:
11285     case OMPD_for:
11286     case OMPD_parallel_for:
11287     case OMPD_parallel_master:
11288     case OMPD_parallel_sections:
11289     case OMPD_for_simd:
11290     case OMPD_parallel_for_simd:
11291     case OMPD_cancel:
11292     case OMPD_cancellation_point:
11293     case OMPD_ordered:
11294     case OMPD_threadprivate:
11295     case OMPD_allocate:
11296     case OMPD_task:
11297     case OMPD_simd:
11298     case OMPD_tile:
11299     case OMPD_unroll:
11300     case OMPD_sections:
11301     case OMPD_section:
11302     case OMPD_single:
11303     case OMPD_master:
11304     case OMPD_critical:
11305     case OMPD_taskyield:
11306     case OMPD_barrier:
11307     case OMPD_taskwait:
11308     case OMPD_taskgroup:
11309     case OMPD_atomic:
11310     case OMPD_flush:
11311     case OMPD_depobj:
11312     case OMPD_scan:
11313     case OMPD_teams:
11314     case OMPD_target_data:
11315     case OMPD_distribute:
11316     case OMPD_distribute_simd:
11317     case OMPD_distribute_parallel_for:
11318     case OMPD_distribute_parallel_for_simd:
11319     case OMPD_teams_distribute:
11320     case OMPD_teams_distribute_simd:
11321     case OMPD_teams_distribute_parallel_for:
11322     case OMPD_teams_distribute_parallel_for_simd:
11323     case OMPD_declare_simd:
11324     case OMPD_declare_variant:
11325     case OMPD_begin_declare_variant:
11326     case OMPD_end_declare_variant:
11327     case OMPD_declare_target:
11328     case OMPD_end_declare_target:
11329     case OMPD_declare_reduction:
11330     case OMPD_declare_mapper:
11331     case OMPD_taskloop:
11332     case OMPD_taskloop_simd:
11333     case OMPD_master_taskloop:
11334     case OMPD_master_taskloop_simd:
11335     case OMPD_parallel_master_taskloop:
11336     case OMPD_parallel_master_taskloop_simd:
11337     case OMPD_target:
11338     case OMPD_target_simd:
11339     case OMPD_target_teams_distribute:
11340     case OMPD_target_teams_distribute_simd:
11341     case OMPD_target_teams_distribute_parallel_for:
11342     case OMPD_target_teams_distribute_parallel_for_simd:
11343     case OMPD_target_teams:
11344     case OMPD_target_parallel:
11345     case OMPD_target_parallel_for:
11346     case OMPD_target_parallel_for_simd:
11347     case OMPD_requires:
11348     case OMPD_metadirective:
11349     case OMPD_unknown:
11350     default:
11351       llvm_unreachable("Unexpected standalone target data directive.");
11352       break;
11353     }
11354     CGF.EmitRuntimeCall(
11355         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11356         OffloadingArgs);
11357   };
11358 
11359   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11360                           &MapNamesArray](CodeGenFunction &CGF,
11361                                           PrePostActionTy &) {
11362     // Fill up the arrays with all the mapped variables.
11363     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11364 
11365     // Get map clause information.
11366     MappableExprsHandler MEHandler(D, CGF);
11367     MEHandler.generateAllInfo(CombinedInfo);
11368 
11369     TargetDataInfo Info;
11370     // Fill up the arrays and create the arguments.
11371     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11372                          /*IsNonContiguous=*/true);
11373     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11374                              D.hasClausesOfKind<OMPNowaitClause>();
11375     emitOffloadingArraysArgument(
11376         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11377         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11378         {/*ForEndCall=*/false});
11379     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11380     InputInfo.BasePointersArray =
11381         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11382     InputInfo.PointersArray =
11383         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11384     InputInfo.SizesArray =
11385         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11386     InputInfo.MappersArray =
11387         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11388     MapTypesArray = Info.MapTypesArray;
11389     MapNamesArray = Info.MapNamesArray;
11390     if (RequiresOuterTask)
11391       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11392     else
11393       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11394   };
11395 
11396   if (IfCond) {
11397     emitIfClause(CGF, IfCond, TargetThenGen,
11398                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11399   } else {
11400     RegionCodeGenTy ThenRCG(TargetThenGen);
11401     ThenRCG(CGF);
11402   }
11403 }
11404 
11405 namespace {
11406   /// Kind of parameter in a function with 'declare simd' directive.
11407   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11408   /// Attribute set of the parameter.
11409   struct ParamAttrTy {
11410     ParamKindTy Kind = Vector;
11411     llvm::APSInt StrideOrArg;
11412     llvm::APSInt Alignment;
11413   };
11414 } // namespace
11415 
11416 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11417                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11418   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11419   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11420   // of that clause. The VLEN value must be power of 2.
11421   // In other case the notion of the function`s "characteristic data type" (CDT)
11422   // is used to compute the vector length.
11423   // CDT is defined in the following order:
11424   //   a) For non-void function, the CDT is the return type.
11425   //   b) If the function has any non-uniform, non-linear parameters, then the
11426   //   CDT is the type of the first such parameter.
11427   //   c) If the CDT determined by a) or b) above is struct, union, or class
11428   //   type which is pass-by-value (except for the type that maps to the
11429   //   built-in complex data type), the characteristic data type is int.
11430   //   d) If none of the above three cases is applicable, the CDT is int.
11431   // The VLEN is then determined based on the CDT and the size of vector
11432   // register of that ISA for which current vector version is generated. The
11433   // VLEN is computed using the formula below:
11434   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11435   // where vector register size specified in section 3.2.1 Registers and the
11436   // Stack Frame of original AMD64 ABI document.
11437   QualType RetType = FD->getReturnType();
11438   if (RetType.isNull())
11439     return 0;
11440   ASTContext &C = FD->getASTContext();
11441   QualType CDT;
11442   if (!RetType.isNull() && !RetType->isVoidType()) {
11443     CDT = RetType;
11444   } else {
11445     unsigned Offset = 0;
11446     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11447       if (ParamAttrs[Offset].Kind == Vector)
11448         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11449       ++Offset;
11450     }
11451     if (CDT.isNull()) {
11452       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11453         if (ParamAttrs[I + Offset].Kind == Vector) {
11454           CDT = FD->getParamDecl(I)->getType();
11455           break;
11456         }
11457       }
11458     }
11459   }
11460   if (CDT.isNull())
11461     CDT = C.IntTy;
11462   CDT = CDT->getCanonicalTypeUnqualified();
11463   if (CDT->isRecordType() || CDT->isUnionType())
11464     CDT = C.IntTy;
11465   return C.getTypeSize(CDT);
11466 }
11467 
11468 static void
11469 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11470                            const llvm::APSInt &VLENVal,
11471                            ArrayRef<ParamAttrTy> ParamAttrs,
11472                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11473   struct ISADataTy {
11474     char ISA;
11475     unsigned VecRegSize;
11476   };
11477   ISADataTy ISAData[] = {
11478       {
11479           'b', 128
11480       }, // SSE
11481       {
11482           'c', 256
11483       }, // AVX
11484       {
11485           'd', 256
11486       }, // AVX2
11487       {
11488           'e', 512
11489       }, // AVX512
11490   };
11491   llvm::SmallVector<char, 2> Masked;
11492   switch (State) {
11493   case OMPDeclareSimdDeclAttr::BS_Undefined:
11494     Masked.push_back('N');
11495     Masked.push_back('M');
11496     break;
11497   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11498     Masked.push_back('N');
11499     break;
11500   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11501     Masked.push_back('M');
11502     break;
11503   }
11504   for (char Mask : Masked) {
11505     for (const ISADataTy &Data : ISAData) {
11506       SmallString<256> Buffer;
11507       llvm::raw_svector_ostream Out(Buffer);
11508       Out << "_ZGV" << Data.ISA << Mask;
11509       if (!VLENVal) {
11510         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11511         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11512         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11513       } else {
11514         Out << VLENVal;
11515       }
11516       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11517         switch (ParamAttr.Kind){
11518         case LinearWithVarStride:
11519           Out << 's' << ParamAttr.StrideOrArg;
11520           break;
11521         case Linear:
11522           Out << 'l';
11523           if (ParamAttr.StrideOrArg != 1)
11524             Out << ParamAttr.StrideOrArg;
11525           break;
11526         case Uniform:
11527           Out << 'u';
11528           break;
11529         case Vector:
11530           Out << 'v';
11531           break;
11532         }
11533         if (!!ParamAttr.Alignment)
11534           Out << 'a' << ParamAttr.Alignment;
11535       }
11536       Out << '_' << Fn->getName();
11537       Fn->addFnAttr(Out.str());
11538     }
11539   }
11540 }
11541 
11542 // This are the Functions that are needed to mangle the name of the
11543 // vector functions generated by the compiler, according to the rules
11544 // defined in the "Vector Function ABI specifications for AArch64",
11545 // available at
11546 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11547 
11548 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11549 ///
11550 /// TODO: Need to implement the behavior for reference marked with a
11551 /// var or no linear modifiers (1.b in the section). For this, we
11552 /// need to extend ParamKindTy to support the linear modifiers.
11553 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11554   QT = QT.getCanonicalType();
11555 
11556   if (QT->isVoidType())
11557     return false;
11558 
11559   if (Kind == ParamKindTy::Uniform)
11560     return false;
11561 
11562   if (Kind == ParamKindTy::Linear)
11563     return false;
11564 
11565   // TODO: Handle linear references with modifiers
11566 
11567   if (Kind == ParamKindTy::LinearWithVarStride)
11568     return false;
11569 
11570   return true;
11571 }
11572 
11573 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11574 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11575   QT = QT.getCanonicalType();
11576   unsigned Size = C.getTypeSize(QT);
11577 
11578   // Only scalars and complex within 16 bytes wide set PVB to true.
11579   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11580     return false;
11581 
11582   if (QT->isFloatingType())
11583     return true;
11584 
11585   if (QT->isIntegerType())
11586     return true;
11587 
11588   if (QT->isPointerType())
11589     return true;
11590 
11591   // TODO: Add support for complex types (section 3.1.2, item 2).
11592 
11593   return false;
11594 }
11595 
11596 /// Computes the lane size (LS) of a return type or of an input parameter,
11597 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11598 /// TODO: Add support for references, section 3.2.1, item 1.
11599 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11600   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11601     QualType PTy = QT.getCanonicalType()->getPointeeType();
11602     if (getAArch64PBV(PTy, C))
11603       return C.getTypeSize(PTy);
11604   }
11605   if (getAArch64PBV(QT, C))
11606     return C.getTypeSize(QT);
11607 
11608   return C.getTypeSize(C.getUIntPtrType());
11609 }
11610 
11611 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11612 // signature of the scalar function, as defined in 3.2.2 of the
11613 // AAVFABI.
11614 static std::tuple<unsigned, unsigned, bool>
11615 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11616   QualType RetType = FD->getReturnType().getCanonicalType();
11617 
11618   ASTContext &C = FD->getASTContext();
11619 
11620   bool OutputBecomesInput = false;
11621 
11622   llvm::SmallVector<unsigned, 8> Sizes;
11623   if (!RetType->isVoidType()) {
11624     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11625     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11626       OutputBecomesInput = true;
11627   }
11628   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11629     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11630     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11631   }
11632 
11633   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11634   // The LS of a function parameter / return value can only be a power
11635   // of 2, starting from 8 bits, up to 128.
11636   assert(llvm::all_of(Sizes,
11637                       [](unsigned Size) {
11638                         return Size == 8 || Size == 16 || Size == 32 ||
11639                                Size == 64 || Size == 128;
11640                       }) &&
11641          "Invalid size");
11642 
11643   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11644                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11645                          OutputBecomesInput);
11646 }
11647 
11648 /// Mangle the parameter part of the vector function name according to
11649 /// their OpenMP classification. The mangling function is defined in
11650 /// section 3.5 of the AAVFABI.
11651 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11652   SmallString<256> Buffer;
11653   llvm::raw_svector_ostream Out(Buffer);
11654   for (const auto &ParamAttr : ParamAttrs) {
11655     switch (ParamAttr.Kind) {
11656     case LinearWithVarStride:
11657       Out << "ls" << ParamAttr.StrideOrArg;
11658       break;
11659     case Linear:
11660       Out << 'l';
11661       // Don't print the step value if it is not present or if it is
11662       // equal to 1.
11663       if (ParamAttr.StrideOrArg != 1)
11664         Out << ParamAttr.StrideOrArg;
11665       break;
11666     case Uniform:
11667       Out << 'u';
11668       break;
11669     case Vector:
11670       Out << 'v';
11671       break;
11672     }
11673 
11674     if (!!ParamAttr.Alignment)
11675       Out << 'a' << ParamAttr.Alignment;
11676   }
11677 
11678   return std::string(Out.str());
11679 }
11680 
11681 // Function used to add the attribute. The parameter `VLEN` is
11682 // templated to allow the use of "x" when targeting scalable functions
11683 // for SVE.
11684 template <typename T>
11685 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11686                                  char ISA, StringRef ParSeq,
11687                                  StringRef MangledName, bool OutputBecomesInput,
11688                                  llvm::Function *Fn) {
11689   SmallString<256> Buffer;
11690   llvm::raw_svector_ostream Out(Buffer);
11691   Out << Prefix << ISA << LMask << VLEN;
11692   if (OutputBecomesInput)
11693     Out << "v";
11694   Out << ParSeq << "_" << MangledName;
11695   Fn->addFnAttr(Out.str());
11696 }
11697 
11698 // Helper function to generate the Advanced SIMD names depending on
11699 // the value of the NDS when simdlen is not present.
11700 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11701                                       StringRef Prefix, char ISA,
11702                                       StringRef ParSeq, StringRef MangledName,
11703                                       bool OutputBecomesInput,
11704                                       llvm::Function *Fn) {
11705   switch (NDS) {
11706   case 8:
11707     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11708                          OutputBecomesInput, Fn);
11709     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11710                          OutputBecomesInput, Fn);
11711     break;
11712   case 16:
11713     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11714                          OutputBecomesInput, Fn);
11715     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11716                          OutputBecomesInput, Fn);
11717     break;
11718   case 32:
11719     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11720                          OutputBecomesInput, Fn);
11721     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11722                          OutputBecomesInput, Fn);
11723     break;
11724   case 64:
11725   case 128:
11726     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11727                          OutputBecomesInput, Fn);
11728     break;
11729   default:
11730     llvm_unreachable("Scalar type is too wide.");
11731   }
11732 }
11733 
11734 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11735 static void emitAArch64DeclareSimdFunction(
11736     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11737     ArrayRef<ParamAttrTy> ParamAttrs,
11738     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11739     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11740 
11741   // Get basic data for building the vector signature.
11742   const auto Data = getNDSWDS(FD, ParamAttrs);
11743   const unsigned NDS = std::get<0>(Data);
11744   const unsigned WDS = std::get<1>(Data);
11745   const bool OutputBecomesInput = std::get<2>(Data);
11746 
11747   // Check the values provided via `simdlen` by the user.
11748   // 1. A `simdlen(1)` doesn't produce vector signatures,
11749   if (UserVLEN == 1) {
11750     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11751         DiagnosticsEngine::Warning,
11752         "The clause simdlen(1) has no effect when targeting aarch64.");
11753     CGM.getDiags().Report(SLoc, DiagID);
11754     return;
11755   }
11756 
11757   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11758   // Advanced SIMD output.
11759   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11760     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11761         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11762                                     "power of 2 when targeting Advanced SIMD.");
11763     CGM.getDiags().Report(SLoc, DiagID);
11764     return;
11765   }
11766 
11767   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11768   // limits.
11769   if (ISA == 's' && UserVLEN != 0) {
11770     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11771       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11772           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11773                                       "lanes in the architectural constraints "
11774                                       "for SVE (min is 128-bit, max is "
11775                                       "2048-bit, by steps of 128-bit)");
11776       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11777       return;
11778     }
11779   }
11780 
11781   // Sort out parameter sequence.
11782   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11783   StringRef Prefix = "_ZGV";
11784   // Generate simdlen from user input (if any).
11785   if (UserVLEN) {
11786     if (ISA == 's') {
11787       // SVE generates only a masked function.
11788       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11789                            OutputBecomesInput, Fn);
11790     } else {
11791       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11792       // Advanced SIMD generates one or two functions, depending on
11793       // the `[not]inbranch` clause.
11794       switch (State) {
11795       case OMPDeclareSimdDeclAttr::BS_Undefined:
11796         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11797                              OutputBecomesInput, Fn);
11798         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11799                              OutputBecomesInput, Fn);
11800         break;
11801       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11802         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11803                              OutputBecomesInput, Fn);
11804         break;
11805       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11806         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11807                              OutputBecomesInput, Fn);
11808         break;
11809       }
11810     }
11811   } else {
11812     // If no user simdlen is provided, follow the AAVFABI rules for
11813     // generating the vector length.
11814     if (ISA == 's') {
11815       // SVE, section 3.4.1, item 1.
11816       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11817                            OutputBecomesInput, Fn);
11818     } else {
11819       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11820       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11821       // two vector names depending on the use of the clause
11822       // `[not]inbranch`.
11823       switch (State) {
11824       case OMPDeclareSimdDeclAttr::BS_Undefined:
11825         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11826                                   OutputBecomesInput, Fn);
11827         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11828                                   OutputBecomesInput, Fn);
11829         break;
11830       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11831         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11832                                   OutputBecomesInput, Fn);
11833         break;
11834       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11835         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11836                                   OutputBecomesInput, Fn);
11837         break;
11838       }
11839     }
11840   }
11841 }
11842 
11843 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11844                                               llvm::Function *Fn) {
11845   ASTContext &C = CGM.getContext();
11846   FD = FD->getMostRecentDecl();
11847   while (FD) {
11848     // Map params to their positions in function decl.
11849     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11850     if (isa<CXXMethodDecl>(FD))
11851       ParamPositions.try_emplace(FD, 0);
11852     unsigned ParamPos = ParamPositions.size();
11853     for (const ParmVarDecl *P : FD->parameters()) {
11854       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11855       ++ParamPos;
11856     }
11857     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11858       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11859       // Mark uniform parameters.
11860       for (const Expr *E : Attr->uniforms()) {
11861         E = E->IgnoreParenImpCasts();
11862         unsigned Pos;
11863         if (isa<CXXThisExpr>(E)) {
11864           Pos = ParamPositions[FD];
11865         } else {
11866           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11867                                 ->getCanonicalDecl();
11868           auto It = ParamPositions.find(PVD);
11869           assert(It != ParamPositions.end() && "Function parameter not found");
11870           Pos = It->second;
11871         }
11872         ParamAttrs[Pos].Kind = Uniform;
11873       }
11874       // Get alignment info.
11875       auto *NI = Attr->alignments_begin();
11876       for (const Expr *E : Attr->aligneds()) {
11877         E = E->IgnoreParenImpCasts();
11878         unsigned Pos;
11879         QualType ParmTy;
11880         if (isa<CXXThisExpr>(E)) {
11881           Pos = ParamPositions[FD];
11882           ParmTy = E->getType();
11883         } else {
11884           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11885                                 ->getCanonicalDecl();
11886           auto It = ParamPositions.find(PVD);
11887           assert(It != ParamPositions.end() && "Function parameter not found");
11888           Pos = It->second;
11889           ParmTy = PVD->getType();
11890         }
11891         ParamAttrs[Pos].Alignment =
11892             (*NI)
11893                 ? (*NI)->EvaluateKnownConstInt(C)
11894                 : llvm::APSInt::getUnsigned(
11895                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11896                           .getQuantity());
11897         ++NI;
11898       }
11899       // Mark linear parameters.
11900       auto *SI = Attr->steps_begin();
11901       for (const Expr *E : Attr->linears()) {
11902         E = E->IgnoreParenImpCasts();
11903         unsigned Pos;
11904         // Rescaling factor needed to compute the linear parameter
11905         // value in the mangled name.
11906         unsigned PtrRescalingFactor = 1;
11907         if (isa<CXXThisExpr>(E)) {
11908           Pos = ParamPositions[FD];
11909         } else {
11910           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11911                                 ->getCanonicalDecl();
11912           auto It = ParamPositions.find(PVD);
11913           assert(It != ParamPositions.end() && "Function parameter not found");
11914           Pos = It->second;
11915           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11916             PtrRescalingFactor = CGM.getContext()
11917                                      .getTypeSizeInChars(P->getPointeeType())
11918                                      .getQuantity();
11919         }
11920         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11921         ParamAttr.Kind = Linear;
11922         // Assuming a stride of 1, for `linear` without modifiers.
11923         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11924         if (*SI) {
11925           Expr::EvalResult Result;
11926           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11927             if (const auto *DRE =
11928                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11929               if (const auto *StridePVD =
11930                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11931                 ParamAttr.Kind = LinearWithVarStride;
11932                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11933                 assert(It != ParamPositions.end() &&
11934                        "Function parameter not found");
11935                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11936               }
11937             }
11938           } else {
11939             ParamAttr.StrideOrArg = Result.Val.getInt();
11940           }
11941         }
11942         // If we are using a linear clause on a pointer, we need to
11943         // rescale the value of linear_step with the byte size of the
11944         // pointee type.
11945         if (Linear == ParamAttr.Kind)
11946           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11947         ++SI;
11948       }
11949       llvm::APSInt VLENVal;
11950       SourceLocation ExprLoc;
11951       const Expr *VLENExpr = Attr->getSimdlen();
11952       if (VLENExpr) {
11953         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11954         ExprLoc = VLENExpr->getExprLoc();
11955       }
11956       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11957       if (CGM.getTriple().isX86()) {
11958         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11959       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11960         unsigned VLEN = VLENVal.getExtValue();
11961         StringRef MangledName = Fn->getName();
11962         if (CGM.getTarget().hasFeature("sve"))
11963           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11964                                          MangledName, 's', 128, Fn, ExprLoc);
11965         if (CGM.getTarget().hasFeature("neon"))
11966           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11967                                          MangledName, 'n', 128, Fn, ExprLoc);
11968       }
11969     }
11970     FD = FD->getPreviousDecl();
11971   }
11972 }
11973 
11974 namespace {
11975 /// Cleanup action for doacross support.
11976 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11977 public:
11978   static const int DoacrossFinArgs = 2;
11979 
11980 private:
11981   llvm::FunctionCallee RTLFn;
11982   llvm::Value *Args[DoacrossFinArgs];
11983 
11984 public:
11985   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11986                     ArrayRef<llvm::Value *> CallArgs)
11987       : RTLFn(RTLFn) {
11988     assert(CallArgs.size() == DoacrossFinArgs);
11989     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11990   }
11991   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11992     if (!CGF.HaveInsertPoint())
11993       return;
11994     CGF.EmitRuntimeCall(RTLFn, Args);
11995   }
11996 };
11997 } // namespace
11998 
11999 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12000                                        const OMPLoopDirective &D,
12001                                        ArrayRef<Expr *> NumIterations) {
12002   if (!CGF.HaveInsertPoint())
12003     return;
12004 
12005   ASTContext &C = CGM.getContext();
12006   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12007   RecordDecl *RD;
12008   if (KmpDimTy.isNull()) {
12009     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12010     //  kmp_int64 lo; // lower
12011     //  kmp_int64 up; // upper
12012     //  kmp_int64 st; // stride
12013     // };
12014     RD = C.buildImplicitRecord("kmp_dim");
12015     RD->startDefinition();
12016     addFieldToRecordDecl(C, RD, Int64Ty);
12017     addFieldToRecordDecl(C, RD, Int64Ty);
12018     addFieldToRecordDecl(C, RD, Int64Ty);
12019     RD->completeDefinition();
12020     KmpDimTy = C.getRecordType(RD);
12021   } else {
12022     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12023   }
12024   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12025   QualType ArrayTy =
12026       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12027 
12028   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12029   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12030   enum { LowerFD = 0, UpperFD, StrideFD };
12031   // Fill dims with data.
12032   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12033     LValue DimsLVal = CGF.MakeAddrLValue(
12034         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12035     // dims.upper = num_iterations;
12036     LValue UpperLVal = CGF.EmitLValueForField(
12037         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12038     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12039         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12040         Int64Ty, NumIterations[I]->getExprLoc());
12041     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12042     // dims.stride = 1;
12043     LValue StrideLVal = CGF.EmitLValueForField(
12044         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12045     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12046                           StrideLVal);
12047   }
12048 
12049   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12050   // kmp_int32 num_dims, struct kmp_dim * dims);
12051   llvm::Value *Args[] = {
12052       emitUpdateLocation(CGF, D.getBeginLoc()),
12053       getThreadID(CGF, D.getBeginLoc()),
12054       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12055       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12056           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12057           CGM.VoidPtrTy)};
12058 
12059   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12060       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12061   CGF.EmitRuntimeCall(RTLFn, Args);
12062   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12063       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12064   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12065       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12066   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12067                                              llvm::makeArrayRef(FiniArgs));
12068 }
12069 
12070 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12071                                           const OMPDependClause *C) {
12072   QualType Int64Ty =
12073       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12074   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12075   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12076       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12077   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12078   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12079     const Expr *CounterVal = C->getLoopData(I);
12080     assert(CounterVal);
12081     llvm::Value *CntVal = CGF.EmitScalarConversion(
12082         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12083         CounterVal->getExprLoc());
12084     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12085                           /*Volatile=*/false, Int64Ty);
12086   }
12087   llvm::Value *Args[] = {
12088       emitUpdateLocation(CGF, C->getBeginLoc()),
12089       getThreadID(CGF, C->getBeginLoc()),
12090       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12091   llvm::FunctionCallee RTLFn;
12092   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12093     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12094                                                   OMPRTL___kmpc_doacross_post);
12095   } else {
12096     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12097     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12098                                                   OMPRTL___kmpc_doacross_wait);
12099   }
12100   CGF.EmitRuntimeCall(RTLFn, Args);
12101 }
12102 
12103 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12104                                llvm::FunctionCallee Callee,
12105                                ArrayRef<llvm::Value *> Args) const {
12106   assert(Loc.isValid() && "Outlined function call location must be valid.");
12107   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12108 
12109   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12110     if (Fn->doesNotThrow()) {
12111       CGF.EmitNounwindRuntimeCall(Fn, Args);
12112       return;
12113     }
12114   }
12115   CGF.EmitRuntimeCall(Callee, Args);
12116 }
12117 
12118 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12119     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12120     ArrayRef<llvm::Value *> Args) const {
12121   emitCall(CGF, Loc, OutlinedFn, Args);
12122 }
12123 
12124 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12125   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12126     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12127       HasEmittedDeclareTargetRegion = true;
12128 }
12129 
12130 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12131                                              const VarDecl *NativeParam,
12132                                              const VarDecl *TargetParam) const {
12133   return CGF.GetAddrOfLocalVar(NativeParam);
12134 }
12135 
12136 /// Return allocator value from expression, or return a null allocator (default
12137 /// when no allocator specified).
12138 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12139                                     const Expr *Allocator) {
12140   llvm::Value *AllocVal;
12141   if (Allocator) {
12142     AllocVal = CGF.EmitScalarExpr(Allocator);
12143     // According to the standard, the original allocator type is a enum
12144     // (integer). Convert to pointer type, if required.
12145     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12146                                         CGF.getContext().VoidPtrTy,
12147                                         Allocator->getExprLoc());
12148   } else {
12149     // If no allocator specified, it defaults to the null allocator.
12150     AllocVal = llvm::Constant::getNullValue(
12151         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12152   }
12153   return AllocVal;
12154 }
12155 
12156 /// Given the allocate directive list item type and align clause value,
12157 /// return appropriate alignment.
12158 static llvm::Value *getAlignmentValue(CodeGenFunction &CGF, QualType ListItemTy,
12159                                       const Expr *Alignment) {
12160   if (!Alignment)
12161     return nullptr;
12162 
12163   unsigned UserAlign =
12164       Alignment->EvaluateKnownConstInt(CGF.getContext()).getExtValue();
12165   CharUnits NaturalAlign = CGF.CGM.getNaturalTypeAlignment(ListItemTy);
12166 
12167   // OpenMP5.1 pg 185 lines 7-10
12168   //   Each item in the align modifier list must be aligned to the maximum
12169   //   of the specified alignment and the type's natural alignment.
12170   //
12171   // If no alignment specified then use the natural alignment.
12172   return llvm::ConstantInt::get(
12173       CGF.CGM.SizeTy,
12174       std::max<unsigned>(UserAlign, NaturalAlign.getQuantity()));
12175 }
12176 
12177 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12178                                                    const VarDecl *VD) {
12179   if (!VD)
12180     return Address::invalid();
12181   Address UntiedAddr = Address::invalid();
12182   Address UntiedRealAddr = Address::invalid();
12183   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12184   if (It != FunctionToUntiedTaskStackMap.end()) {
12185     const UntiedLocalVarsAddressesMap &UntiedData =
12186         UntiedLocalVarsStack[It->second];
12187     auto I = UntiedData.find(VD);
12188     if (I != UntiedData.end()) {
12189       UntiedAddr = I->second.first;
12190       UntiedRealAddr = I->second.second;
12191     }
12192   }
12193   const VarDecl *CVD = VD->getCanonicalDecl();
12194   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12195     // Use the default allocation.
12196     if (!isAllocatableDecl(VD))
12197       return UntiedAddr;
12198     llvm::Value *Size;
12199     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12200     if (CVD->getType()->isVariablyModifiedType()) {
12201       Size = CGF.getTypeSize(CVD->getType());
12202       // Align the size: ((size + align - 1) / align) * align
12203       Size = CGF.Builder.CreateNUWAdd(
12204           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12205       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12206       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12207     } else {
12208       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12209       Size = CGM.getSize(Sz.alignTo(Align));
12210     }
12211     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12212     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12213     const Expr *Allocator = AA->getAllocator();
12214     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12215     llvm::Value *Alignment = getAlignmentValue(
12216         CGF, VD->getType().getNonReferenceType(), AA->getAlignment());
12217     SmallVector<llvm::Value *, 4> Args;
12218     Args.push_back(ThreadID);
12219     if (Alignment)
12220       Args.push_back(Alignment);
12221     Args.push_back(Size);
12222     Args.push_back(AllocVal);
12223     llvm::omp::RuntimeFunction FnID =
12224         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12225     llvm::Value *Addr = CGF.EmitRuntimeCall(
12226         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12227         getName({CVD->getName(), ".void.addr"}));
12228     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12229         CGM.getModule(), OMPRTL___kmpc_free);
12230     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12231     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12232         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12233     if (UntiedAddr.isValid())
12234       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12235 
12236     // Cleanup action for allocate support.
12237     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12238       llvm::FunctionCallee RTLFn;
12239       SourceLocation::UIntTy LocEncoding;
12240       Address Addr;
12241       const Expr *AllocExpr;
12242 
12243     public:
12244       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12245                            SourceLocation::UIntTy LocEncoding, Address Addr,
12246                            const Expr *AllocExpr)
12247           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12248             AllocExpr(AllocExpr) {}
12249       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12250         if (!CGF.HaveInsertPoint())
12251           return;
12252         llvm::Value *Args[3];
12253         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12254             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12255         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12256             Addr.getPointer(), CGF.VoidPtrTy);
12257         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12258         Args[2] = AllocVal;
12259         CGF.EmitRuntimeCall(RTLFn, Args);
12260       }
12261     };
12262     Address VDAddr =
12263         UntiedRealAddr.isValid()
12264             ? UntiedRealAddr
12265             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12266     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12267         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12268         VDAddr, Allocator);
12269     if (UntiedRealAddr.isValid())
12270       if (auto *Region =
12271               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12272         Region->emitUntiedSwitch(CGF);
12273     return VDAddr;
12274   }
12275   return UntiedAddr;
12276 }
12277 
12278 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12279                                              const VarDecl *VD) const {
12280   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12281   if (It == FunctionToUntiedTaskStackMap.end())
12282     return false;
12283   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12284 }
12285 
12286 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12287     CodeGenModule &CGM, const OMPLoopDirective &S)
12288     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12289   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12290   if (!NeedToPush)
12291     return;
12292   NontemporalDeclsSet &DS =
12293       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12294   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12295     for (const Stmt *Ref : C->private_refs()) {
12296       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12297       const ValueDecl *VD;
12298       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12299         VD = DRE->getDecl();
12300       } else {
12301         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12302         assert((ME->isImplicitCXXThis() ||
12303                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12304                "Expected member of current class.");
12305         VD = ME->getMemberDecl();
12306       }
12307       DS.insert(VD);
12308     }
12309   }
12310 }
12311 
12312 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12313   if (!NeedToPush)
12314     return;
12315   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12316 }
12317 
12318 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12319     CodeGenFunction &CGF,
12320     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12321                           std::pair<Address, Address>> &LocalVars)
12322     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12323   if (!NeedToPush)
12324     return;
12325   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12326       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12327   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12328 }
12329 
12330 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12331   if (!NeedToPush)
12332     return;
12333   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12334 }
12335 
12336 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12337   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12338 
12339   return llvm::any_of(
12340       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12341       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12342 }
12343 
12344 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12345     const OMPExecutableDirective &S,
12346     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12347     const {
12348   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12349   // Vars in target/task regions must be excluded completely.
12350   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12351       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12352     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12353     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12354     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12355     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12356       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12357         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12358     }
12359   }
12360   // Exclude vars in private clauses.
12361   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12362     for (const Expr *Ref : C->varlists()) {
12363       if (!Ref->getType()->isScalarType())
12364         continue;
12365       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12366       if (!DRE)
12367         continue;
12368       NeedToCheckForLPCs.insert(DRE->getDecl());
12369     }
12370   }
12371   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12372     for (const Expr *Ref : C->varlists()) {
12373       if (!Ref->getType()->isScalarType())
12374         continue;
12375       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12376       if (!DRE)
12377         continue;
12378       NeedToCheckForLPCs.insert(DRE->getDecl());
12379     }
12380   }
12381   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12382     for (const Expr *Ref : C->varlists()) {
12383       if (!Ref->getType()->isScalarType())
12384         continue;
12385       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12386       if (!DRE)
12387         continue;
12388       NeedToCheckForLPCs.insert(DRE->getDecl());
12389     }
12390   }
12391   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12392     for (const Expr *Ref : C->varlists()) {
12393       if (!Ref->getType()->isScalarType())
12394         continue;
12395       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12396       if (!DRE)
12397         continue;
12398       NeedToCheckForLPCs.insert(DRE->getDecl());
12399     }
12400   }
12401   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12402     for (const Expr *Ref : C->varlists()) {
12403       if (!Ref->getType()->isScalarType())
12404         continue;
12405       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12406       if (!DRE)
12407         continue;
12408       NeedToCheckForLPCs.insert(DRE->getDecl());
12409     }
12410   }
12411   for (const Decl *VD : NeedToCheckForLPCs) {
12412     for (const LastprivateConditionalData &Data :
12413          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12414       if (Data.DeclToUniqueName.count(VD) > 0) {
12415         if (!Data.Disabled)
12416           NeedToAddForLPCsAsDisabled.insert(VD);
12417         break;
12418       }
12419     }
12420   }
12421 }
12422 
12423 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12424     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12425     : CGM(CGF.CGM),
12426       Action((CGM.getLangOpts().OpenMP >= 50 &&
12427               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12428                            [](const OMPLastprivateClause *C) {
12429                              return C->getKind() ==
12430                                     OMPC_LASTPRIVATE_conditional;
12431                            }))
12432                  ? ActionToDo::PushAsLastprivateConditional
12433                  : ActionToDo::DoNotPush) {
12434   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12435   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12436     return;
12437   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12438          "Expected a push action.");
12439   LastprivateConditionalData &Data =
12440       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12441   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12442     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12443       continue;
12444 
12445     for (const Expr *Ref : C->varlists()) {
12446       Data.DeclToUniqueName.insert(std::make_pair(
12447           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12448           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12449     }
12450   }
12451   Data.IVLVal = IVLVal;
12452   Data.Fn = CGF.CurFn;
12453 }
12454 
12455 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12456     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12457     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12458   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12459   if (CGM.getLangOpts().OpenMP < 50)
12460     return;
12461   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12462   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12463   if (!NeedToAddForLPCsAsDisabled.empty()) {
12464     Action = ActionToDo::DisableLastprivateConditional;
12465     LastprivateConditionalData &Data =
12466         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12467     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12468       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12469     Data.Fn = CGF.CurFn;
12470     Data.Disabled = true;
12471   }
12472 }
12473 
12474 CGOpenMPRuntime::LastprivateConditionalRAII
12475 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12476     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12477   return LastprivateConditionalRAII(CGF, S);
12478 }
12479 
12480 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12481   if (CGM.getLangOpts().OpenMP < 50)
12482     return;
12483   if (Action == ActionToDo::DisableLastprivateConditional) {
12484     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12485            "Expected list of disabled private vars.");
12486     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12487   }
12488   if (Action == ActionToDo::PushAsLastprivateConditional) {
12489     assert(
12490         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12491         "Expected list of lastprivate conditional vars.");
12492     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12493   }
12494 }
12495 
12496 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12497                                                         const VarDecl *VD) {
12498   ASTContext &C = CGM.getContext();
12499   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12500   if (I == LastprivateConditionalToTypes.end())
12501     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12502   QualType NewType;
12503   const FieldDecl *VDField;
12504   const FieldDecl *FiredField;
12505   LValue BaseLVal;
12506   auto VI = I->getSecond().find(VD);
12507   if (VI == I->getSecond().end()) {
12508     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12509     RD->startDefinition();
12510     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12511     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12512     RD->completeDefinition();
12513     NewType = C.getRecordType(RD);
12514     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12515     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12516     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12517   } else {
12518     NewType = std::get<0>(VI->getSecond());
12519     VDField = std::get<1>(VI->getSecond());
12520     FiredField = std::get<2>(VI->getSecond());
12521     BaseLVal = std::get<3>(VI->getSecond());
12522   }
12523   LValue FiredLVal =
12524       CGF.EmitLValueForField(BaseLVal, FiredField);
12525   CGF.EmitStoreOfScalar(
12526       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12527       FiredLVal);
12528   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12529 }
12530 
12531 namespace {
12532 /// Checks if the lastprivate conditional variable is referenced in LHS.
12533 class LastprivateConditionalRefChecker final
12534     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12535   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12536   const Expr *FoundE = nullptr;
12537   const Decl *FoundD = nullptr;
12538   StringRef UniqueDeclName;
12539   LValue IVLVal;
12540   llvm::Function *FoundFn = nullptr;
12541   SourceLocation Loc;
12542 
12543 public:
12544   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12545     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12546          llvm::reverse(LPM)) {
12547       auto It = D.DeclToUniqueName.find(E->getDecl());
12548       if (It == D.DeclToUniqueName.end())
12549         continue;
12550       if (D.Disabled)
12551         return false;
12552       FoundE = E;
12553       FoundD = E->getDecl()->getCanonicalDecl();
12554       UniqueDeclName = It->second;
12555       IVLVal = D.IVLVal;
12556       FoundFn = D.Fn;
12557       break;
12558     }
12559     return FoundE == E;
12560   }
12561   bool VisitMemberExpr(const MemberExpr *E) {
12562     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12563       return false;
12564     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12565          llvm::reverse(LPM)) {
12566       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12567       if (It == D.DeclToUniqueName.end())
12568         continue;
12569       if (D.Disabled)
12570         return false;
12571       FoundE = E;
12572       FoundD = E->getMemberDecl()->getCanonicalDecl();
12573       UniqueDeclName = It->second;
12574       IVLVal = D.IVLVal;
12575       FoundFn = D.Fn;
12576       break;
12577     }
12578     return FoundE == E;
12579   }
12580   bool VisitStmt(const Stmt *S) {
12581     for (const Stmt *Child : S->children()) {
12582       if (!Child)
12583         continue;
12584       if (const auto *E = dyn_cast<Expr>(Child))
12585         if (!E->isGLValue())
12586           continue;
12587       if (Visit(Child))
12588         return true;
12589     }
12590     return false;
12591   }
12592   explicit LastprivateConditionalRefChecker(
12593       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12594       : LPM(LPM) {}
12595   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12596   getFoundData() const {
12597     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12598   }
12599 };
12600 } // namespace
12601 
12602 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12603                                                        LValue IVLVal,
12604                                                        StringRef UniqueDeclName,
12605                                                        LValue LVal,
12606                                                        SourceLocation Loc) {
12607   // Last updated loop counter for the lastprivate conditional var.
12608   // int<xx> last_iv = 0;
12609   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12610   llvm::Constant *LastIV =
12611       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12612   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12613       IVLVal.getAlignment().getAsAlign());
12614   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12615 
12616   // Last value of the lastprivate conditional.
12617   // decltype(priv_a) last_a;
12618   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12619       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12620   Last->setAlignment(LVal.getAlignment().getAsAlign());
12621   LValue LastLVal = CGF.MakeAddrLValue(
12622       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12623 
12624   // Global loop counter. Required to handle inner parallel-for regions.
12625   // iv
12626   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12627 
12628   // #pragma omp critical(a)
12629   // if (last_iv <= iv) {
12630   //   last_iv = iv;
12631   //   last_a = priv_a;
12632   // }
12633   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12634                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12635     Action.Enter(CGF);
12636     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12637     // (last_iv <= iv) ? Check if the variable is updated and store new
12638     // value in global var.
12639     llvm::Value *CmpRes;
12640     if (IVLVal.getType()->isSignedIntegerType()) {
12641       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12642     } else {
12643       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12644              "Loop iteration variable must be integer.");
12645       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12646     }
12647     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12648     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12649     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12650     // {
12651     CGF.EmitBlock(ThenBB);
12652 
12653     //   last_iv = iv;
12654     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12655 
12656     //   last_a = priv_a;
12657     switch (CGF.getEvaluationKind(LVal.getType())) {
12658     case TEK_Scalar: {
12659       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12660       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12661       break;
12662     }
12663     case TEK_Complex: {
12664       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12665       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12666       break;
12667     }
12668     case TEK_Aggregate:
12669       llvm_unreachable(
12670           "Aggregates are not supported in lastprivate conditional.");
12671     }
12672     // }
12673     CGF.EmitBranch(ExitBB);
12674     // There is no need to emit line number for unconditional branch.
12675     (void)ApplyDebugLocation::CreateEmpty(CGF);
12676     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12677   };
12678 
12679   if (CGM.getLangOpts().OpenMPSimd) {
12680     // Do not emit as a critical region as no parallel region could be emitted.
12681     RegionCodeGenTy ThenRCG(CodeGen);
12682     ThenRCG(CGF);
12683   } else {
12684     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12685   }
12686 }
12687 
12688 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12689                                                          const Expr *LHS) {
12690   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12691     return;
12692   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12693   if (!Checker.Visit(LHS))
12694     return;
12695   const Expr *FoundE;
12696   const Decl *FoundD;
12697   StringRef UniqueDeclName;
12698   LValue IVLVal;
12699   llvm::Function *FoundFn;
12700   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12701       Checker.getFoundData();
12702   if (FoundFn != CGF.CurFn) {
12703     // Special codegen for inner parallel regions.
12704     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12705     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12706     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12707            "Lastprivate conditional is not found in outer region.");
12708     QualType StructTy = std::get<0>(It->getSecond());
12709     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12710     LValue PrivLVal = CGF.EmitLValue(FoundE);
12711     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12712         PrivLVal.getAddress(CGF),
12713         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12714         CGF.ConvertTypeForMem(StructTy));
12715     LValue BaseLVal =
12716         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12717     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12718     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12719                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12720                         FiredLVal, llvm::AtomicOrdering::Unordered,
12721                         /*IsVolatile=*/true, /*isInit=*/false);
12722     return;
12723   }
12724 
12725   // Private address of the lastprivate conditional in the current context.
12726   // priv_a
12727   LValue LVal = CGF.EmitLValue(FoundE);
12728   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12729                                    FoundE->getExprLoc());
12730 }
12731 
12732 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12733     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12734     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12735   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12736     return;
12737   auto Range = llvm::reverse(LastprivateConditionalStack);
12738   auto It = llvm::find_if(
12739       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12740   if (It == Range.end() || It->Fn != CGF.CurFn)
12741     return;
12742   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12743   assert(LPCI != LastprivateConditionalToTypes.end() &&
12744          "Lastprivates must be registered already.");
12745   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12746   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12747   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12748   for (const auto &Pair : It->DeclToUniqueName) {
12749     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12750     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12751       continue;
12752     auto I = LPCI->getSecond().find(Pair.first);
12753     assert(I != LPCI->getSecond().end() &&
12754            "Lastprivate must be rehistered already.");
12755     // bool Cmp = priv_a.Fired != 0;
12756     LValue BaseLVal = std::get<3>(I->getSecond());
12757     LValue FiredLVal =
12758         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12759     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12760     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12761     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12762     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12763     // if (Cmp) {
12764     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12765     CGF.EmitBlock(ThenBB);
12766     Address Addr = CGF.GetAddrOfLocalVar(VD);
12767     LValue LVal;
12768     if (VD->getType()->isReferenceType())
12769       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12770                                            AlignmentSource::Decl);
12771     else
12772       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12773                                 AlignmentSource::Decl);
12774     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12775                                      D.getBeginLoc());
12776     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12777     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12778     // }
12779   }
12780 }
12781 
12782 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12783     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12784     SourceLocation Loc) {
12785   if (CGF.getLangOpts().OpenMP < 50)
12786     return;
12787   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12788   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12789          "Unknown lastprivate conditional variable.");
12790   StringRef UniqueName = It->second;
12791   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12792   // The variable was not updated in the region - exit.
12793   if (!GV)
12794     return;
12795   LValue LPLVal = CGF.MakeAddrLValue(
12796       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12797       PrivLVal.getType().getNonReferenceType());
12798   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12799   CGF.EmitStoreOfScalar(Res, PrivLVal);
12800 }
12801 
12802 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12803     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12804     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12805   llvm_unreachable("Not supported in SIMD-only mode");
12806 }
12807 
12808 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12809     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12810     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12811   llvm_unreachable("Not supported in SIMD-only mode");
12812 }
12813 
12814 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12815     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12816     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12817     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12818     bool Tied, unsigned &NumberOfParts) {
12819   llvm_unreachable("Not supported in SIMD-only mode");
12820 }
12821 
12822 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12823                                            SourceLocation Loc,
12824                                            llvm::Function *OutlinedFn,
12825                                            ArrayRef<llvm::Value *> CapturedVars,
12826                                            const Expr *IfCond,
12827                                            llvm::Value *NumThreads) {
12828   llvm_unreachable("Not supported in SIMD-only mode");
12829 }
12830 
12831 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12832     CodeGenFunction &CGF, StringRef CriticalName,
12833     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12834     const Expr *Hint) {
12835   llvm_unreachable("Not supported in SIMD-only mode");
12836 }
12837 
12838 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12839                                            const RegionCodeGenTy &MasterOpGen,
12840                                            SourceLocation Loc) {
12841   llvm_unreachable("Not supported in SIMD-only mode");
12842 }
12843 
12844 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12845                                            const RegionCodeGenTy &MasterOpGen,
12846                                            SourceLocation Loc,
12847                                            const Expr *Filter) {
12848   llvm_unreachable("Not supported in SIMD-only mode");
12849 }
12850 
12851 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12852                                             SourceLocation Loc) {
12853   llvm_unreachable("Not supported in SIMD-only mode");
12854 }
12855 
12856 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12857     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12858     SourceLocation Loc) {
12859   llvm_unreachable("Not supported in SIMD-only mode");
12860 }
12861 
12862 void CGOpenMPSIMDRuntime::emitSingleRegion(
12863     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12864     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12865     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12866     ArrayRef<const Expr *> AssignmentOps) {
12867   llvm_unreachable("Not supported in SIMD-only mode");
12868 }
12869 
12870 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12871                                             const RegionCodeGenTy &OrderedOpGen,
12872                                             SourceLocation Loc,
12873                                             bool IsThreads) {
12874   llvm_unreachable("Not supported in SIMD-only mode");
12875 }
12876 
12877 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12878                                           SourceLocation Loc,
12879                                           OpenMPDirectiveKind Kind,
12880                                           bool EmitChecks,
12881                                           bool ForceSimpleCall) {
12882   llvm_unreachable("Not supported in SIMD-only mode");
12883 }
12884 
12885 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12886     CodeGenFunction &CGF, SourceLocation Loc,
12887     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12888     bool Ordered, const DispatchRTInput &DispatchValues) {
12889   llvm_unreachable("Not supported in SIMD-only mode");
12890 }
12891 
12892 void CGOpenMPSIMDRuntime::emitForStaticInit(
12893     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12894     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12895   llvm_unreachable("Not supported in SIMD-only mode");
12896 }
12897 
12898 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12899     CodeGenFunction &CGF, SourceLocation Loc,
12900     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12901   llvm_unreachable("Not supported in SIMD-only mode");
12902 }
12903 
12904 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12905                                                      SourceLocation Loc,
12906                                                      unsigned IVSize,
12907                                                      bool IVSigned) {
12908   llvm_unreachable("Not supported in SIMD-only mode");
12909 }
12910 
12911 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12912                                               SourceLocation Loc,
12913                                               OpenMPDirectiveKind DKind) {
12914   llvm_unreachable("Not supported in SIMD-only mode");
12915 }
12916 
12917 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12918                                               SourceLocation Loc,
12919                                               unsigned IVSize, bool IVSigned,
12920                                               Address IL, Address LB,
12921                                               Address UB, Address ST) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
12925 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12926                                                llvm::Value *NumThreads,
12927                                                SourceLocation Loc) {
12928   llvm_unreachable("Not supported in SIMD-only mode");
12929 }
12930 
12931 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12932                                              ProcBindKind ProcBind,
12933                                              SourceLocation Loc) {
12934   llvm_unreachable("Not supported in SIMD-only mode");
12935 }
12936 
12937 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12938                                                     const VarDecl *VD,
12939                                                     Address VDAddr,
12940                                                     SourceLocation Loc) {
12941   llvm_unreachable("Not supported in SIMD-only mode");
12942 }
12943 
12944 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12945     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12946     CodeGenFunction *CGF) {
12947   llvm_unreachable("Not supported in SIMD-only mode");
12948 }
12949 
12950 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12951     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12952   llvm_unreachable("Not supported in SIMD-only mode");
12953 }
12954 
12955 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12956                                     ArrayRef<const Expr *> Vars,
12957                                     SourceLocation Loc,
12958                                     llvm::AtomicOrdering AO) {
12959   llvm_unreachable("Not supported in SIMD-only mode");
12960 }
12961 
12962 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12963                                        const OMPExecutableDirective &D,
12964                                        llvm::Function *TaskFunction,
12965                                        QualType SharedsTy, Address Shareds,
12966                                        const Expr *IfCond,
12967                                        const OMPTaskDataTy &Data) {
12968   llvm_unreachable("Not supported in SIMD-only mode");
12969 }
12970 
12971 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12972     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12973     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12974     const Expr *IfCond, const OMPTaskDataTy &Data) {
12975   llvm_unreachable("Not supported in SIMD-only mode");
12976 }
12977 
12978 void CGOpenMPSIMDRuntime::emitReduction(
12979     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12980     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12981     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12982   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12983   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12984                                  ReductionOps, Options);
12985 }
12986 
12987 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12988     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12989     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12990   llvm_unreachable("Not supported in SIMD-only mode");
12991 }
12992 
12993 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12994                                                 SourceLocation Loc,
12995                                                 bool IsWorksharingReduction) {
12996   llvm_unreachable("Not supported in SIMD-only mode");
12997 }
12998 
12999 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13000                                                   SourceLocation Loc,
13001                                                   ReductionCodeGen &RCG,
13002                                                   unsigned N) {
13003   llvm_unreachable("Not supported in SIMD-only mode");
13004 }
13005 
13006 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13007                                                   SourceLocation Loc,
13008                                                   llvm::Value *ReductionsPtr,
13009                                                   LValue SharedLVal) {
13010   llvm_unreachable("Not supported in SIMD-only mode");
13011 }
13012 
13013 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13014                                            SourceLocation Loc,
13015                                            const OMPTaskDataTy &Data) {
13016   llvm_unreachable("Not supported in SIMD-only mode");
13017 }
13018 
13019 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13020     CodeGenFunction &CGF, SourceLocation Loc,
13021     OpenMPDirectiveKind CancelRegion) {
13022   llvm_unreachable("Not supported in SIMD-only mode");
13023 }
13024 
13025 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13026                                          SourceLocation Loc, const Expr *IfCond,
13027                                          OpenMPDirectiveKind CancelRegion) {
13028   llvm_unreachable("Not supported in SIMD-only mode");
13029 }
13030 
13031 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13032     const OMPExecutableDirective &D, StringRef ParentName,
13033     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13034     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13035   llvm_unreachable("Not supported in SIMD-only mode");
13036 }
13037 
13038 void CGOpenMPSIMDRuntime::emitTargetCall(
13039     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13040     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13041     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13042     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13043                                      const OMPLoopDirective &D)>
13044         SizeEmitter) {
13045   llvm_unreachable("Not supported in SIMD-only mode");
13046 }
13047 
13048 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13049   llvm_unreachable("Not supported in SIMD-only mode");
13050 }
13051 
13052 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13053   llvm_unreachable("Not supported in SIMD-only mode");
13054 }
13055 
13056 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13057   return false;
13058 }
13059 
13060 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13061                                         const OMPExecutableDirective &D,
13062                                         SourceLocation Loc,
13063                                         llvm::Function *OutlinedFn,
13064                                         ArrayRef<llvm::Value *> CapturedVars) {
13065   llvm_unreachable("Not supported in SIMD-only mode");
13066 }
13067 
13068 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13069                                              const Expr *NumTeams,
13070                                              const Expr *ThreadLimit,
13071                                              SourceLocation Loc) {
13072   llvm_unreachable("Not supported in SIMD-only mode");
13073 }
13074 
13075 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13076     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13077     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13078   llvm_unreachable("Not supported in SIMD-only mode");
13079 }
13080 
13081 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13082     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13083     const Expr *Device) {
13084   llvm_unreachable("Not supported in SIMD-only mode");
13085 }
13086 
13087 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13088                                            const OMPLoopDirective &D,
13089                                            ArrayRef<Expr *> NumIterations) {
13090   llvm_unreachable("Not supported in SIMD-only mode");
13091 }
13092 
13093 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13094                                               const OMPDependClause *C) {
13095   llvm_unreachable("Not supported in SIMD-only mode");
13096 }
13097 
13098 const VarDecl *
13099 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13100                                         const VarDecl *NativeParam) const {
13101   llvm_unreachable("Not supported in SIMD-only mode");
13102 }
13103 
13104 Address
13105 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13106                                          const VarDecl *NativeParam,
13107                                          const VarDecl *TargetParam) const {
13108   llvm_unreachable("Not supported in SIMD-only mode");
13109 }
13110