1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49 
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54   /// Kinds of OpenMP regions used in codegen.
55   enum CGOpenMPRegionKind {
56     /// Region with outlined function for standalone 'parallel'
57     /// directive.
58     ParallelOutlinedRegion,
59     /// Region with outlined function for standalone 'task' directive.
60     TaskOutlinedRegion,
61     /// Region for constructs that do not require function outlining,
62     /// like 'for', 'sections', 'atomic' etc. directives.
63     InlinedRegion,
64     /// Region with outlined function for standalone 'target' directive.
65     TargetRegion,
66   };
67 
68   CGOpenMPRegionInfo(const CapturedStmt &CS,
69                      const CGOpenMPRegionKind RegionKind,
70                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71                      bool HasCancel)
72       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 
75   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77                      bool HasCancel)
78       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79         Kind(Kind), HasCancel(HasCancel) {}
80 
81   /// Get a variable or parameter for storing global thread id
82   /// inside OpenMP construct.
83   virtual const VarDecl *getThreadIDVariable() const = 0;
84 
85   /// Emit the captured statement body.
86   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 
88   /// Get an LValue for the current ThreadID variable.
89   /// \return LValue for thread id variable. This LValue always has type int32*.
90   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 
92   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 
94   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 
96   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 
98   bool hasCancel() const { return HasCancel; }
99 
100   static bool classof(const CGCapturedStmtInfo *Info) {
101     return Info->getKind() == CR_OpenMP;
102   }
103 
104   ~CGOpenMPRegionInfo() override = default;
105 
106 protected:
107   CGOpenMPRegionKind RegionKind;
108   RegionCodeGenTy CodeGen;
109   OpenMPDirectiveKind Kind;
110   bool HasCancel;
111 };
112 
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
116   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117                              const RegionCodeGenTy &CodeGen,
118                              OpenMPDirectiveKind Kind, bool HasCancel,
119                              StringRef HelperName)
120       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121                            HasCancel),
122         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124   }
125 
126   /// Get a variable or parameter for storing global thread id
127   /// inside OpenMP construct.
128   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 
130   /// Get the name of the capture helper.
131   StringRef getHelperName() const override { return HelperName; }
132 
133   static bool classof(const CGCapturedStmtInfo *Info) {
134     return CGOpenMPRegionInfo::classof(Info) &&
135            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136                ParallelOutlinedRegion;
137   }
138 
139 private:
140   /// A variable or parameter storing global thread id for OpenMP
141   /// constructs.
142   const VarDecl *ThreadIDVar;
143   StringRef HelperName;
144 };
145 
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149   class UntiedTaskActionTy final : public PrePostActionTy {
150     bool Untied;
151     const VarDecl *PartIDVar;
152     const RegionCodeGenTy UntiedCodeGen;
153     llvm::SwitchInst *UntiedSwitch = nullptr;
154 
155   public:
156     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157                        const RegionCodeGenTy &UntiedCodeGen)
158         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159     void Enter(CodeGenFunction &CGF) override {
160       if (Untied) {
161         // Emit task switching point.
162         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163             CGF.GetAddrOfLocalVar(PartIDVar),
164             PartIDVar->getType()->castAs<PointerType>());
165         llvm::Value *Res =
166             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169         CGF.EmitBlock(DoneBB);
170         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173                               CGF.Builder.GetInsertBlock());
174         emitUntiedSwitch(CGF);
175       }
176     }
177     void emitUntiedSwitch(CodeGenFunction &CGF) const {
178       if (Untied) {
179         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180             CGF.GetAddrOfLocalVar(PartIDVar),
181             PartIDVar->getType()->castAs<PointerType>());
182         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183                               PartIdLVal);
184         UntiedCodeGen(CGF);
185         CodeGenFunction::JumpDest CurPoint =
186             CGF.getJumpDestInCurrentScope(".untied.next.");
187         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190                               CGF.Builder.GetInsertBlock());
191         CGF.EmitBranchThroughCleanup(CurPoint);
192         CGF.EmitBlock(CurPoint.getBlock());
193       }
194     }
195     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196   };
197   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198                                  const VarDecl *ThreadIDVar,
199                                  const RegionCodeGenTy &CodeGen,
200                                  OpenMPDirectiveKind Kind, bool HasCancel,
201                                  const UntiedTaskActionTy &Action)
202       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203         ThreadIDVar(ThreadIDVar), Action(Action) {
204     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205   }
206 
207   /// Get a variable or parameter for storing global thread id
208   /// inside OpenMP construct.
209   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 
211   /// Get an LValue for the current ThreadID variable.
212   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 
214   /// Get the name of the capture helper.
215   StringRef getHelperName() const override { return ".omp_outlined."; }
216 
217   void emitUntiedSwitch(CodeGenFunction &CGF) override {
218     Action.emitUntiedSwitch(CGF);
219   }
220 
221   static bool classof(const CGCapturedStmtInfo *Info) {
222     return CGOpenMPRegionInfo::classof(Info) &&
223            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224                TaskOutlinedRegion;
225   }
226 
227 private:
228   /// A variable or parameter storing global thread id for OpenMP
229   /// constructs.
230   const VarDecl *ThreadIDVar;
231   /// Action for emitting code for untied tasks.
232   const UntiedTaskActionTy &Action;
233 };
234 
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
239   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240                             const RegionCodeGenTy &CodeGen,
241                             OpenMPDirectiveKind Kind, bool HasCancel)
242       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243         OldCSI(OldCSI),
244         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 
246   // Retrieve the value of the context parameter.
247   llvm::Value *getContextValue() const override {
248     if (OuterRegionInfo)
249       return OuterRegionInfo->getContextValue();
250     llvm_unreachable("No context value for inlined OpenMP region");
251   }
252 
253   void setContextValue(llvm::Value *V) override {
254     if (OuterRegionInfo) {
255       OuterRegionInfo->setContextValue(V);
256       return;
257     }
258     llvm_unreachable("No context value for inlined OpenMP region");
259   }
260 
261   /// Lookup the captured field decl for a variable.
262   const FieldDecl *lookup(const VarDecl *VD) const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->lookup(VD);
265     // If there is no outer outlined region,no need to lookup in a list of
266     // captured variables, we can use the original one.
267     return nullptr;
268   }
269 
270   FieldDecl *getThisFieldDecl() const override {
271     if (OuterRegionInfo)
272       return OuterRegionInfo->getThisFieldDecl();
273     return nullptr;
274   }
275 
276   /// Get a variable or parameter for storing global thread id
277   /// inside OpenMP construct.
278   const VarDecl *getThreadIDVariable() const override {
279     if (OuterRegionInfo)
280       return OuterRegionInfo->getThreadIDVariable();
281     return nullptr;
282   }
283 
284   /// Get an LValue for the current ThreadID variable.
285   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286     if (OuterRegionInfo)
287       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288     llvm_unreachable("No LValue for inlined OpenMP construct");
289   }
290 
291   /// Get the name of the capture helper.
292   StringRef getHelperName() const override {
293     if (auto *OuterRegionInfo = getOldCSI())
294       return OuterRegionInfo->getHelperName();
295     llvm_unreachable("No helper name for inlined OpenMP construct");
296   }
297 
298   void emitUntiedSwitch(CodeGenFunction &CGF) override {
299     if (OuterRegionInfo)
300       OuterRegionInfo->emitUntiedSwitch(CGF);
301   }
302 
303   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 
305   static bool classof(const CGCapturedStmtInfo *Info) {
306     return CGOpenMPRegionInfo::classof(Info) &&
307            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308   }
309 
310   ~CGOpenMPInlinedRegionInfo() override = default;
311 
312 private:
313   /// CodeGen info about outer OpenMP region.
314   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315   CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317 
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
325   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
327       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328                            /*HasCancel=*/false),
329         HelperName(HelperName) {}
330 
331   /// This is unused for target regions because each starts executing
332   /// with a single thread.
333   const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 
335   /// Get the name of the capture helper.
336   StringRef getHelperName() const override { return HelperName; }
337 
338   static bool classof(const CGCapturedStmtInfo *Info) {
339     return CGOpenMPRegionInfo::classof(Info) &&
340            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341   }
342 
343 private:
344   StringRef HelperName;
345 };
346 
347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348   llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
354   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356                                   OMPD_unknown,
357                                   /*HasCancel=*/false),
358         PrivScope(CGF) {
359     // Make sure the globals captured in the provided statement are local by
360     // using the privatization logic. We assume the same variable is not
361     // captured more than once.
362     for (const auto &C : CS.captures()) {
363       if (!C.capturesVariable() && !C.capturesVariableByCopy())
364         continue;
365 
366       const VarDecl *VD = C.getCapturedVar();
367       if (VD->isLocalVarDeclOrParm())
368         continue;
369 
370       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371                       /*RefersToEnclosingVariableOrCapture=*/false,
372                       VD->getType().getNonReferenceType(), VK_LValue,
373                       C.getLocation());
374       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
375     }
376     (void)PrivScope.Privatize();
377   }
378 
379   /// Lookup the captured field decl for a variable.
380   const FieldDecl *lookup(const VarDecl *VD) const override {
381     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382       return FD;
383     return nullptr;
384   }
385 
386   /// Emit the captured statement body.
387   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388     llvm_unreachable("No body for expressions");
389   }
390 
391   /// Get a variable or parameter for storing global thread id
392   /// inside OpenMP construct.
393   const VarDecl *getThreadIDVariable() const override {
394     llvm_unreachable("No thread id for expressions");
395   }
396 
397   /// Get the name of the capture helper.
398   StringRef getHelperName() const override {
399     llvm_unreachable("No helper name for expressions");
400   }
401 
402   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405   /// Private scope to capture global variables.
406   CodeGenFunction::OMPPrivateScope PrivScope;
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411   CodeGenFunction &CGF;
412   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
413   FieldDecl *LambdaThisCaptureField = nullptr;
414   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415   bool NoInheritance = false;
416 
417 public:
418   /// Constructs region for combined constructs.
419   /// \param CodeGen Code generation sequence for combined directives. Includes
420   /// a list of functions used for code generation of implicitly inlined
421   /// regions.
422   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423                           OpenMPDirectiveKind Kind, bool HasCancel,
424                           bool NoInheritance = true)
425       : CGF(CGF), NoInheritance(NoInheritance) {
426     // Start emission for the construct.
427     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429     if (NoInheritance) {
430       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432       CGF.LambdaThisCaptureField = nullptr;
433       BlockInfo = CGF.BlockInfo;
434       CGF.BlockInfo = nullptr;
435     }
436   }
437 
438   ~InlinedOpenMPRegionRAII() {
439     // Restore original CapturedStmtInfo only if we're done with code emission.
440     auto *OldCSI =
441         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442     delete CGF.CapturedStmtInfo;
443     CGF.CapturedStmtInfo = OldCSI;
444     if (NoInheritance) {
445       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447       CGF.BlockInfo = BlockInfo;
448     }
449   }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456   /// Use trampoline for internal microtask.
457   OMP_IDENT_IMD = 0x01,
458   /// Use c-style ident structure.
459   OMP_IDENT_KMPC = 0x02,
460   /// Atomic reduction option for kmpc_reduce.
461   OMP_ATOMIC_REDUCE = 0x10,
462   /// Explicit 'barrier' directive.
463   OMP_IDENT_BARRIER_EXPL = 0x20,
464   /// Implicit barrier in code.
465   OMP_IDENT_BARRIER_IMPL = 0x40,
466   /// Implicit barrier in 'for' directive.
467   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468   /// Implicit barrier in 'sections' directive.
469   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470   /// Implicit barrier in 'single' directive.
471   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472   /// Call of __kmp_for_static_init for static loop.
473   OMP_IDENT_WORK_LOOP = 0x200,
474   /// Call of __kmp_for_static_init for sections.
475   OMP_IDENT_WORK_SECTIONS = 0x400,
476   /// Call of __kmp_for_static_init for distribute.
477   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 namespace {
482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
483 /// Values for bit flags for marking which requires clauses have been used.
484 enum OpenMPOffloadingRequiresDirFlags : int64_t {
485   /// flag undefined.
486   OMP_REQ_UNDEFINED               = 0x000,
487   /// no requires clause present.
488   OMP_REQ_NONE                    = 0x001,
489   /// reverse_offload clause.
490   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
491   /// unified_address clause.
492   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
493   /// unified_shared_memory clause.
494   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
495   /// dynamic_allocators clause.
496   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
497   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
498 };
499 
500 enum OpenMPOffloadingReservedDeviceIDs {
501   /// Device ID if the device was not defined, runtime should get it
502   /// from environment variables in the spec.
503   OMP_DEVICEID_UNDEF = -1,
504 };
505 } // anonymous namespace
506 
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
513 ///                                  see above  */
514 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
515 ///                                  KMP_IDENT_KMPC identifies this union
516 ///                                  member  */
517 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
518 ///                                  see above */
519 ///#if USE_ITT_BUILD
520 ///                            /*  but currently used for storing
521 ///                                region-specific ITT */
522 ///                            /*  contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
525 ///                                 C++  */
526 ///    char const *psource;    /**< String describing the source location.
527 ///                            The string is composed of semi-colon separated
528 //                             fields which describe the source file,
529 ///                            the function and a pair of line numbers that
530 ///                            delimit the construct.
531 ///                             */
532 /// } ident_t;
533 enum IdentFieldIndex {
534   /// might be used in Fortran
535   IdentField_Reserved_1,
536   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537   IdentField_Flags,
538   /// Not really used in Fortran any more
539   IdentField_Reserved_2,
540   /// Source[4] in Fortran, do not use for C++
541   IdentField_Reserved_3,
542   /// String describing the source location. The string is composed of
543   /// semi-colon separated fields which describe the source file, the function
544   /// and a pair of line numbers that delimit the construct.
545   IdentField_PSource
546 };
547 
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType {
551   /// Lower bound for default (unordered) versions.
552   OMP_sch_lower = 32,
553   OMP_sch_static_chunked = 33,
554   OMP_sch_static = 34,
555   OMP_sch_dynamic_chunked = 35,
556   OMP_sch_guided_chunked = 36,
557   OMP_sch_runtime = 37,
558   OMP_sch_auto = 38,
559   /// static with chunk adjustment (e.g., simd)
560   OMP_sch_static_balanced_chunked = 45,
561   /// Lower bound for 'ordered' versions.
562   OMP_ord_lower = 64,
563   OMP_ord_static_chunked = 65,
564   OMP_ord_static = 66,
565   OMP_ord_dynamic_chunked = 67,
566   OMP_ord_guided_chunked = 68,
567   OMP_ord_runtime = 69,
568   OMP_ord_auto = 70,
569   OMP_sch_default = OMP_sch_static,
570   /// dist_schedule types
571   OMP_dist_sch_static_chunked = 91,
572   OMP_dist_sch_static = 92,
573   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574   /// Set if the monotonic schedule modifier was present.
575   OMP_sch_modifier_monotonic = (1 << 29),
576   /// Set if the nonmonotonic schedule modifier was present.
577   OMP_sch_modifier_nonmonotonic = (1 << 30),
578 };
579 
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581 /// region.
582 class CleanupTy final : public EHScopeStack::Cleanup {
583   PrePostActionTy *Action;
584 
585 public:
586   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588     if (!CGF.HaveInsertPoint())
589       return;
590     Action->Exit(CGF);
591   }
592 };
593 
594 } // anonymous namespace
595 
596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
597   CodeGenFunction::RunCleanupsScope Scope(CGF);
598   if (PrePostAction) {
599     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600     Callback(CodeGen, CGF, *PrePostAction);
601   } else {
602     PrePostActionTy Action;
603     Callback(CodeGen, CGF, Action);
604   }
605 }
606 
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl *
610 getReductionInit(const Expr *ReductionOp) {
611   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613       if (const auto *DRE =
614               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616           return DRD;
617   return nullptr;
618 }
619 
620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
621                                              const OMPDeclareReductionDecl *DRD,
622                                              const Expr *InitOp,
623                                              Address Private, Address Original,
624                                              QualType Ty) {
625   if (DRD->getInitializer()) {
626     std::pair<llvm::Function *, llvm::Function *> Reduction =
627         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
628     const auto *CE = cast<CallExpr>(InitOp);
629     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632     const auto *LHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634     const auto *RHSDRE =
635         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   if (DRD)
691     SrcAddr =
692         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
693 
694   llvm::Value *SrcBegin = nullptr;
695   if (DRD)
696     SrcBegin = SrcAddr.getPointer();
697   llvm::Value *DestBegin = DestAddr.getPointer();
698   // Cast from pointer to array type to pointer to single element.
699   llvm::Value *DestEnd =
700       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
701   // The basic structure here is a while-do loop.
702   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704   llvm::Value *IsEmpty =
705       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707 
708   // Enter the loop body, making that address the current address.
709   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710   CGF.EmitBlock(BodyBB);
711 
712   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713 
714   llvm::PHINode *SrcElementPHI = nullptr;
715   Address SrcElementCurrent = Address::invalid();
716   if (DRD) {
717     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718                                           "omp.arraycpy.srcElementPast");
719     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720     SrcElementCurrent =
721         Address(SrcElementPHI, SrcAddr.getElementType(),
722                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723   }
724   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726   DestElementPHI->addIncoming(DestBegin, EntryBB);
727   Address DestElementCurrent =
728       Address(DestElementPHI, DestAddr.getElementType(),
729               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731   // Emit copy.
732   {
733     CodeGenFunction::RunCleanupsScope InitScope(CGF);
734     if (EmitDeclareReductionInit) {
735       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736                                        SrcElementCurrent, ElementTy);
737     } else
738       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739                            /*IsInitializer=*/false);
740   }
741 
742   if (DRD) {
743     // Shift the address forward by one element.
744     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
746         "omp.arraycpy.dest.element");
747     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748   }
749 
750   // Shift the address forward by one element.
751   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
753       "omp.arraycpy.dest.element");
754   // Check whether we've reached the end.
755   llvm::Value *Done =
756       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
757   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
758   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
759 
760   // Done.
761   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
762 }
763 
764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
765   return CGF.EmitOMPSharedLValue(E);
766 }
767 
768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
769                                             const Expr *E) {
770   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
771     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
772   return LValue();
773 }
774 
775 void ReductionCodeGen::emitAggregateInitialization(
776     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
777     const OMPDeclareReductionDecl *DRD) {
778   // Emit VarDecl with copy init for arrays.
779   // Get the address of the original variable captured in current
780   // captured region.
781   const auto *PrivateVD =
782       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
783   bool EmitDeclareReductionInit =
784       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
785   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
786                        EmitDeclareReductionInit,
787                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
788                                                 : PrivateVD->getInit(),
789                        DRD, SharedAddr);
790 }
791 
792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
793                                    ArrayRef<const Expr *> Origs,
794                                    ArrayRef<const Expr *> Privates,
795                                    ArrayRef<const Expr *> ReductionOps) {
796   ClausesData.reserve(Shareds.size());
797   SharedAddresses.reserve(Shareds.size());
798   Sizes.reserve(Shareds.size());
799   BaseDecls.reserve(Shareds.size());
800   const auto *IOrig = Origs.begin();
801   const auto *IPriv = Privates.begin();
802   const auto *IRed = ReductionOps.begin();
803   for (const Expr *Ref : Shareds) {
804     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
805     std::advance(IOrig, 1);
806     std::advance(IPriv, 1);
807     std::advance(IRed, 1);
808   }
809 }
810 
811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
812   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
813          "Number of generated lvalues must be exactly N.");
814   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
815   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
816   SharedAddresses.emplace_back(First, Second);
817   if (ClausesData[N].Shared == ClausesData[N].Ref) {
818     OrigAddresses.emplace_back(First, Second);
819   } else {
820     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
821     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
822     OrigAddresses.emplace_back(First, Second);
823   }
824 }
825 
826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
827   QualType PrivateType = getPrivateType(N);
828   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829   if (!PrivateType->isVariablyModifiedType()) {
830     Sizes.emplace_back(
831         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832         nullptr);
833     return;
834   }
835   llvm::Value *Size;
836   llvm::Value *SizeInChars;
837   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
838   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
839   if (AsArraySection) {
840     Size = CGF.Builder.CreatePtrDiff(ElemType,
841                                      OrigAddresses[N].second.getPointer(CGF),
842                                      OrigAddresses[N].first.getPointer(CGF));
843     Size = CGF.Builder.CreateNUWAdd(
844         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
845     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
846   } else {
847     SizeInChars =
848         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
849     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
850   }
851   Sizes.emplace_back(SizeInChars, Size);
852   CodeGenFunction::OpaqueValueMapping OpaqueMap(
853       CGF,
854       cast<OpaqueValueExpr>(
855           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
856       RValue::get(Size));
857   CGF.EmitVariablyModifiedType(PrivateType);
858 }
859 
860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
861                                          llvm::Value *Size) {
862   QualType PrivateType = getPrivateType(N);
863   if (!PrivateType->isVariablyModifiedType()) {
864     assert(!Size && !Sizes[N].second &&
865            "Size should be nullptr for non-variably modified reduction "
866            "items.");
867     return;
868   }
869   CodeGenFunction::OpaqueValueMapping OpaqueMap(
870       CGF,
871       cast<OpaqueValueExpr>(
872           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
873       RValue::get(Size));
874   CGF.EmitVariablyModifiedType(PrivateType);
875 }
876 
877 void ReductionCodeGen::emitInitialization(
878     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
879     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
880   assert(SharedAddresses.size() > N && "No variable was generated");
881   const auto *PrivateVD =
882       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
883   const OMPDeclareReductionDecl *DRD =
884       getReductionInit(ClausesData[N].ReductionOp);
885   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
886     if (DRD && DRD->getInitializer())
887       (void)DefaultInit(CGF);
888     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
889   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
890     (void)DefaultInit(CGF);
891     QualType SharedType = SharedAddresses[N].first.getType();
892     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
893                                      PrivateAddr, SharedAddr, SharedType);
894   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897                          PrivateVD->getType().getQualifiers(),
898                          /*IsInitializer=*/false);
899   }
900 }
901 
902 bool ReductionCodeGen::needCleanups(unsigned N) {
903   QualType PrivateType = getPrivateType(N);
904   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
905   return DTorKind != QualType::DK_none;
906 }
907 
908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
909                                     Address PrivateAddr) {
910   QualType PrivateType = getPrivateType(N);
911   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
912   if (needCleanups(N)) {
913     PrivateAddr = CGF.Builder.CreateElementBitCast(
914         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
915     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
916   }
917 }
918 
919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
920                           LValue BaseLV) {
921   BaseTy = BaseTy.getNonReferenceType();
922   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
923          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
924     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
925       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
926     } else {
927       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
928       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
929     }
930     BaseTy = BaseTy->getPointeeType();
931   }
932   return CGF.MakeAddrLValue(
933       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
934                                        CGF.ConvertTypeForMem(ElTy)),
935       BaseLV.getType(), BaseLV.getBaseInfo(),
936       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
937 }
938 
939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           Address OriginalBaseAddress, llvm::Value *Addr) {
941   Address Tmp = Address::invalid();
942   Address TopTmp = Address::invalid();
943   Address MostTopTmp = Address::invalid();
944   BaseTy = BaseTy.getNonReferenceType();
945   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
946          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
947     Tmp = CGF.CreateMemTemp(BaseTy);
948     if (TopTmp.isValid())
949       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
950     else
951       MostTopTmp = Tmp;
952     TopTmp = Tmp;
953     BaseTy = BaseTy->getPointeeType();
954   }
955 
956   if (Tmp.isValid()) {
957     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
958         Addr, Tmp.getElementType());
959     CGF.Builder.CreateStore(Addr, Tmp);
960     return MostTopTmp;
961   }
962 
963   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
964       Addr, OriginalBaseAddress.getType());
965   return OriginalBaseAddress.withPointer(Addr);
966 }
967 
968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
969   const VarDecl *OrigVD = nullptr;
970   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
971     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
972     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
973       Base = TempOASE->getBase()->IgnoreParenImpCasts();
974     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
975       Base = TempASE->getBase()->IgnoreParenImpCasts();
976     DE = cast<DeclRefExpr>(Base);
977     OrigVD = cast<VarDecl>(DE->getDecl());
978   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
979     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
980     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981       Base = TempASE->getBase()->IgnoreParenImpCasts();
982     DE = cast<DeclRefExpr>(Base);
983     OrigVD = cast<VarDecl>(DE->getDecl());
984   }
985   return OrigVD;
986 }
987 
988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
989                                                Address PrivateAddr) {
990   const DeclRefExpr *DE;
991   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
992     BaseDecls.emplace_back(OrigVD);
993     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
994     LValue BaseLValue =
995         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
996                     OriginalBaseLValue);
997     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
998     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
999         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1000         SharedAddr.getPointer());
1001     llvm::Value *PrivatePointer =
1002         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1003             PrivateAddr.getPointer(), SharedAddr.getType());
1004     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1005         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1006     return castToBase(CGF, OrigVD->getType(),
1007                       SharedAddresses[N].first.getType(),
1008                       OriginalBaseLValue.getAddress(CGF), Ptr);
1009   }
1010   BaseDecls.emplace_back(
1011       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1012   return PrivateAddr;
1013 }
1014 
1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1016   const OMPDeclareReductionDecl *DRD =
1017       getReductionInit(ClausesData[N].ReductionOp);
1018   return DRD && DRD->getInitializer();
1019 }
1020 
1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1022   return CGF.EmitLoadOfPointerLValue(
1023       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1024       getThreadIDVariable()->getType()->castAs<PointerType>());
1025 }
1026 
1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1028   if (!CGF.HaveInsertPoint())
1029     return;
1030   // 1.2.2 OpenMP Language Terminology
1031   // Structured block - An executable statement with a single entry at the
1032   // top and a single exit at the bottom.
1033   // The point of exit cannot be a branch out of the structured block.
1034   // longjmp() and throw() must not violate the entry/exit criteria.
1035   CGF.EHStack.pushTerminate();
1036   if (S)
1037     CGF.incrementProfileCounter(S);
1038   CodeGen(CGF);
1039   CGF.EHStack.popTerminate();
1040 }
1041 
1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043     CodeGenFunction &CGF) {
1044   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045                             getThreadIDVariable()->getType(),
1046                             AlignmentSource::Decl);
1047 }
1048 
1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050                                        QualType FieldTy) {
1051   auto *Field = FieldDecl::Create(
1052       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055   Field->setAccess(AS_public);
1056   DC->addDecl(Field);
1057   return Field;
1058 }
1059 
1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061                                  StringRef Separator)
1062     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1064   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1065 
1066   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1067   OMPBuilder.initialize();
1068   loadOffloadInfoMetadata();
1069 }
1070 
1071 void CGOpenMPRuntime::clear() {
1072   InternalVars.clear();
1073   // Clean non-target variable declarations possibly used only in debug info.
1074   for (const auto &Data : EmittedNonTargetVariables) {
1075     if (!Data.getValue().pointsToAliveValue())
1076       continue;
1077     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1078     if (!GV)
1079       continue;
1080     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1081       continue;
1082     GV->eraseFromParent();
1083   }
1084 }
1085 
1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1087   SmallString<128> Buffer;
1088   llvm::raw_svector_ostream OS(Buffer);
1089   StringRef Sep = FirstSeparator;
1090   for (StringRef Part : Parts) {
1091     OS << Sep << Part;
1092     Sep = Separator;
1093   }
1094   return std::string(OS.str());
1095 }
1096 
1097 static llvm::Function *
1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1099                           const Expr *CombinerInitializer, const VarDecl *In,
1100                           const VarDecl *Out, bool IsCombiner) {
1101   // void .omp_combiner.(Ty *in, Ty *out);
1102   ASTContext &C = CGM.getContext();
1103   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1104   FunctionArgList Args;
1105   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1106                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1107   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1108                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   Args.push_back(&OmpOutParm);
1110   Args.push_back(&OmpInParm);
1111   const CGFunctionInfo &FnInfo =
1112       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1113   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1114   std::string Name = CGM.getOpenMPRuntime().getName(
1115       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1116   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1117                                     Name, &CGM.getModule());
1118   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1119   if (CGM.getLangOpts().Optimize) {
1120     Fn->removeFnAttr(llvm::Attribute::NoInline);
1121     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1122     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1123   }
1124   CodeGenFunction CGF(CGM);
1125   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1126   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1127   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1128                     Out->getLocation());
1129   CodeGenFunction::OMPPrivateScope Scope(CGF);
1130   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1131   Scope.addPrivate(
1132       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1133               .getAddress(CGF));
1134   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1135   Scope.addPrivate(
1136       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1137                .getAddress(CGF));
1138   (void)Scope.Privatize();
1139   if (!IsCombiner && Out->hasInit() &&
1140       !CGF.isTrivialInitializer(Out->getInit())) {
1141     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1142                          Out->getType().getQualifiers(),
1143                          /*IsInitializer=*/true);
1144   }
1145   if (CombinerInitializer)
1146     CGF.EmitIgnoredExpr(CombinerInitializer);
1147   Scope.ForceCleanup();
1148   CGF.FinishFunction();
1149   return Fn;
1150 }
1151 
1152 void CGOpenMPRuntime::emitUserDefinedReduction(
1153     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1154   if (UDRMap.count(D) > 0)
1155     return;
1156   llvm::Function *Combiner = emitCombinerOrInitializer(
1157       CGM, D->getType(), D->getCombiner(),
1158       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1159       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1160       /*IsCombiner=*/true);
1161   llvm::Function *Initializer = nullptr;
1162   if (const Expr *Init = D->getInitializer()) {
1163     Initializer = emitCombinerOrInitializer(
1164         CGM, D->getType(),
1165         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1166                                                                      : nullptr,
1167         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1168         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1169         /*IsCombiner=*/false);
1170   }
1171   UDRMap.try_emplace(D, Combiner, Initializer);
1172   if (CGF) {
1173     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1174     Decls.second.push_back(D);
1175   }
1176 }
1177 
1178 std::pair<llvm::Function *, llvm::Function *>
1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1180   auto I = UDRMap.find(D);
1181   if (I != UDRMap.end())
1182     return I->second;
1183   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1184   return UDRMap.lookup(D);
1185 }
1186 
1187 namespace {
1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1189 // Builder if one is present.
1190 struct PushAndPopStackRAII {
1191   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1192                       bool HasCancel, llvm::omp::Directive Kind)
1193       : OMPBuilder(OMPBuilder) {
1194     if (!OMPBuilder)
1195       return;
1196 
1197     // The following callback is the crucial part of clangs cleanup process.
1198     //
1199     // NOTE:
1200     // Once the OpenMPIRBuilder is used to create parallel regions (and
1201     // similar), the cancellation destination (Dest below) is determined via
1202     // IP. That means if we have variables to finalize we split the block at IP,
1203     // use the new block (=BB) as destination to build a JumpDest (via
1204     // getJumpDestInCurrentScope(BB)) which then is fed to
1205     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1206     // to push & pop an FinalizationInfo object.
1207     // The FiniCB will still be needed but at the point where the
1208     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1209     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1210       assert(IP.getBlock()->end() == IP.getPoint() &&
1211              "Clang CG should cause non-terminated block!");
1212       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1213       CGF.Builder.restoreIP(IP);
1214       CodeGenFunction::JumpDest Dest =
1215           CGF.getOMPCancelDestination(OMPD_parallel);
1216       CGF.EmitBranchThroughCleanup(Dest);
1217     };
1218 
1219     // TODO: Remove this once we emit parallel regions through the
1220     //       OpenMPIRBuilder as it can do this setup internally.
1221     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1222     OMPBuilder->pushFinalizationCB(std::move(FI));
1223   }
1224   ~PushAndPopStackRAII() {
1225     if (OMPBuilder)
1226       OMPBuilder->popFinalizationCB();
1227   }
1228   llvm::OpenMPIRBuilder *OMPBuilder;
1229 };
1230 } // namespace
1231 
1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1233     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1234     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1235     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1236   assert(ThreadIDVar->getType()->isPointerType() &&
1237          "thread id variable must be of type kmp_int32 *");
1238   CodeGenFunction CGF(CGM, true);
1239   bool HasCancel = false;
1240   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1241     HasCancel = OPD->hasCancel();
1242   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1243     HasCancel = OPD->hasCancel();
1244   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1245     HasCancel = OPSD->hasCancel();
1246   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1247     HasCancel = OPFD->hasCancel();
1248   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1249     HasCancel = OPFD->hasCancel();
1250   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1251     HasCancel = OPFD->hasCancel();
1252   else if (const auto *OPFD =
1253                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD =
1256                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1257     HasCancel = OPFD->hasCancel();
1258 
1259   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1260   //       parallel region to make cancellation barriers work properly.
1261   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1262   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1263   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1264                                     HasCancel, OutlinedHelperName);
1265   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1266   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1267 }
1268 
1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1270     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1271     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1272   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1273   return emitParallelOrTeamsOutlinedFunction(
1274       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1275 }
1276 
1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1278     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1280   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1281   return emitParallelOrTeamsOutlinedFunction(
1282       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1283 }
1284 
1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1286     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1288     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1289     bool Tied, unsigned &NumberOfParts) {
1290   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1291                                               PrePostActionTy &) {
1292     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1293     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1294     llvm::Value *TaskArgs[] = {
1295         UpLoc, ThreadID,
1296         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1297                                     TaskTVar->getType()->castAs<PointerType>())
1298             .getPointer(CGF)};
1299     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1301                         TaskArgs);
1302   };
1303   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1304                                                             UntiedCodeGen);
1305   CodeGen.setAction(Action);
1306   assert(!ThreadIDVar->getType()->isPointerType() &&
1307          "thread id variable must be of type kmp_int32 for tasks");
1308   const OpenMPDirectiveKind Region =
1309       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1310                                                       : OMPD_task;
1311   const CapturedStmt *CS = D.getCapturedStmt(Region);
1312   bool HasCancel = false;
1313   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1314     HasCancel = TD->hasCancel();
1315   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1316     HasCancel = TD->hasCancel();
1317   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1318     HasCancel = TD->hasCancel();
1319   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1320     HasCancel = TD->hasCancel();
1321 
1322   CodeGenFunction CGF(CGM, true);
1323   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1324                                         InnermostKind, HasCancel, Action);
1325   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1326   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1327   if (!Tied)
1328     NumberOfParts = Action.getNumberOfParts();
1329   return Res;
1330 }
1331 
1332 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1333                                              bool AtCurrentPoint) {
1334   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1335   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1336 
1337   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1338   if (AtCurrentPoint) {
1339     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1340         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1341   } else {
1342     Elem.second.ServiceInsertPt =
1343         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1344     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1345   }
1346 }
1347 
1348 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1349   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1350   if (Elem.second.ServiceInsertPt) {
1351     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1352     Elem.second.ServiceInsertPt = nullptr;
1353     Ptr->eraseFromParent();
1354   }
1355 }
1356 
1357 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1358                                                   SourceLocation Loc,
1359                                                   SmallString<128> &Buffer) {
1360   llvm::raw_svector_ostream OS(Buffer);
1361   // Build debug location
1362   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1363   OS << ";" << PLoc.getFilename() << ";";
1364   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1365     OS << FD->getQualifiedNameAsString();
1366   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1367   return OS.str();
1368 }
1369 
1370 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1371                                                  SourceLocation Loc,
1372                                                  unsigned Flags) {
1373   uint32_t SrcLocStrSize;
1374   llvm::Constant *SrcLocStr;
1375   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1376       Loc.isInvalid()) {
1377     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1378   } else {
1379     std::string FunctionName;
1380     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1381       FunctionName = FD->getQualifiedNameAsString();
1382     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1383     const char *FileName = PLoc.getFilename();
1384     unsigned Line = PLoc.getLine();
1385     unsigned Column = PLoc.getColumn();
1386     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1387                                                 Column, SrcLocStrSize);
1388   }
1389   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1390   return OMPBuilder.getOrCreateIdent(
1391       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1392 }
1393 
1394 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1395                                           SourceLocation Loc) {
1396   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1397   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1398   // the clang invariants used below might be broken.
1399   if (CGM.getLangOpts().OpenMPIRBuilder) {
1400     SmallString<128> Buffer;
1401     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1402     uint32_t SrcLocStrSize;
1403     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1404         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1405     return OMPBuilder.getOrCreateThreadID(
1406         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1407   }
1408 
1409   llvm::Value *ThreadID = nullptr;
1410   // Check whether we've already cached a load of the thread id in this
1411   // function.
1412   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1413   if (I != OpenMPLocThreadIDMap.end()) {
1414     ThreadID = I->second.ThreadID;
1415     if (ThreadID != nullptr)
1416       return ThreadID;
1417   }
1418   // If exceptions are enabled, do not use parameter to avoid possible crash.
1419   if (auto *OMPRegionInfo =
1420           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1421     if (OMPRegionInfo->getThreadIDVariable()) {
1422       // Check if this an outlined function with thread id passed as argument.
1423       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1424       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1425       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1426           !CGF.getLangOpts().CXXExceptions ||
1427           CGF.Builder.GetInsertBlock() == TopBlock ||
1428           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1429           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1430               TopBlock ||
1431           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1432               CGF.Builder.GetInsertBlock()) {
1433         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1434         // If value loaded in entry block, cache it and use it everywhere in
1435         // function.
1436         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1437           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1438           Elem.second.ThreadID = ThreadID;
1439         }
1440         return ThreadID;
1441       }
1442     }
1443   }
1444 
1445   // This is not an outlined function region - need to call __kmpc_int32
1446   // kmpc_global_thread_num(ident_t *loc).
1447   // Generate thread id value and cache this value for use across the
1448   // function.
1449   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1450   if (!Elem.second.ServiceInsertPt)
1451     setLocThreadIdInsertPt(CGF);
1452   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1454   llvm::CallInst *Call = CGF.Builder.CreateCall(
1455       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1456                                             OMPRTL___kmpc_global_thread_num),
1457       emitUpdateLocation(CGF, Loc));
1458   Call->setCallingConv(CGF.getRuntimeCC());
1459   Elem.second.ThreadID = Call;
1460   return Call;
1461 }
1462 
1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1464   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1466     clearLocThreadIdInsertPt(CGF);
1467     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1468   }
1469   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1470     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471       UDRMap.erase(D);
1472     FunctionUDRMap.erase(CGF.CurFn);
1473   }
1474   auto I = FunctionUDMMap.find(CGF.CurFn);
1475   if (I != FunctionUDMMap.end()) {
1476     for(const auto *D : I->second)
1477       UDMMap.erase(D);
1478     FunctionUDMMap.erase(I);
1479   }
1480   LastprivateConditionalToTypes.erase(CGF.CurFn);
1481   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1482 }
1483 
1484 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485   return OMPBuilder.IdentPtr;
1486 }
1487 
1488 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1489   if (!Kmpc_MicroTy) {
1490     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1492                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1493     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1494   }
1495   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1496 }
1497 
1498 llvm::FunctionCallee
1499 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1500                                              bool IsGPUDistribute) {
1501   assert((IVSize == 32 || IVSize == 64) &&
1502          "IV size is not compatible with the omp runtime");
1503   StringRef Name;
1504   if (IsGPUDistribute)
1505     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1506                                     : "__kmpc_distribute_static_init_4u")
1507                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1508                                     : "__kmpc_distribute_static_init_8u");
1509   else
1510     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1511                                     : "__kmpc_for_static_init_4u")
1512                         : (IVSigned ? "__kmpc_for_static_init_8"
1513                                     : "__kmpc_for_static_init_8u");
1514 
1515   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1516   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1517   llvm::Type *TypeParams[] = {
1518     getIdentTyPointerTy(),                     // loc
1519     CGM.Int32Ty,                               // tid
1520     CGM.Int32Ty,                               // schedtype
1521     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1522     PtrTy,                                     // p_lower
1523     PtrTy,                                     // p_upper
1524     PtrTy,                                     // p_stride
1525     ITy,                                       // incr
1526     ITy                                        // chunk
1527   };
1528   auto *FnTy =
1529       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1530   return CGM.CreateRuntimeFunction(FnTy, Name);
1531 }
1532 
1533 llvm::FunctionCallee
1534 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1535   assert((IVSize == 32 || IVSize == 64) &&
1536          "IV size is not compatible with the omp runtime");
1537   StringRef Name =
1538       IVSize == 32
1539           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1540           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1541   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1542   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1543                                CGM.Int32Ty,           // tid
1544                                CGM.Int32Ty,           // schedtype
1545                                ITy,                   // lower
1546                                ITy,                   // upper
1547                                ITy,                   // stride
1548                                ITy                    // chunk
1549   };
1550   auto *FnTy =
1551       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1552   return CGM.CreateRuntimeFunction(FnTy, Name);
1553 }
1554 
1555 llvm::FunctionCallee
1556 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1557   assert((IVSize == 32 || IVSize == 64) &&
1558          "IV size is not compatible with the omp runtime");
1559   StringRef Name =
1560       IVSize == 32
1561           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1562           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1563   llvm::Type *TypeParams[] = {
1564       getIdentTyPointerTy(), // loc
1565       CGM.Int32Ty,           // tid
1566   };
1567   auto *FnTy =
1568       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1569   return CGM.CreateRuntimeFunction(FnTy, Name);
1570 }
1571 
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1574   assert((IVSize == 32 || IVSize == 64) &&
1575          "IV size is not compatible with the omp runtime");
1576   StringRef Name =
1577       IVSize == 32
1578           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1579           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1582   llvm::Type *TypeParams[] = {
1583     getIdentTyPointerTy(),                     // loc
1584     CGM.Int32Ty,                               // tid
1585     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1586     PtrTy,                                     // p_lower
1587     PtrTy,                                     // p_upper
1588     PtrTy                                      // p_stride
1589   };
1590   auto *FnTy =
1591       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1592   return CGM.CreateRuntimeFunction(FnTy, Name);
1593 }
1594 
1595 /// Obtain information that uniquely identifies a target entry. This
1596 /// consists of the file and device IDs as well as line number associated with
1597 /// the relevant entry source location.
1598 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1599                                      unsigned &DeviceID, unsigned &FileID,
1600                                      unsigned &LineNum) {
1601   SourceManager &SM = C.getSourceManager();
1602 
1603   // The loc should be always valid and have a file ID (the user cannot use
1604   // #pragma directives in macros)
1605 
1606   assert(Loc.isValid() && "Source location is expected to be always valid.");
1607 
1608   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1609   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1610 
1611   llvm::sys::fs::UniqueID ID;
1612   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1613     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1614     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1615     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1616       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1617           << PLoc.getFilename() << EC.message();
1618   }
1619 
1620   DeviceID = ID.getDevice();
1621   FileID = ID.getFile();
1622   LineNum = PLoc.getLine();
1623 }
1624 
1625 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1626   if (CGM.getLangOpts().OpenMPSimd)
1627     return Address::invalid();
1628   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1629       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1630   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1631               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1632                HasRequiresUnifiedSharedMemory))) {
1633     SmallString<64> PtrName;
1634     {
1635       llvm::raw_svector_ostream OS(PtrName);
1636       OS << CGM.getMangledName(GlobalDecl(VD));
1637       if (!VD->isExternallyVisible()) {
1638         unsigned DeviceID, FileID, Line;
1639         getTargetEntryUniqueInfo(CGM.getContext(),
1640                                  VD->getCanonicalDecl()->getBeginLoc(),
1641                                  DeviceID, FileID, Line);
1642         OS << llvm::format("_%x", FileID);
1643       }
1644       OS << "_decl_tgt_ref_ptr";
1645     }
1646     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1647     QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1648     llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1649     if (!Ptr) {
1650       Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1651 
1652       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1653       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1654 
1655       if (!CGM.getLangOpts().OpenMPIsDevice)
1656         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1657       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1658     }
1659     return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1660   }
1661   return Address::invalid();
1662 }
1663 
1664 llvm::Constant *
1665 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1666   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1667          !CGM.getContext().getTargetInfo().isTLSSupported());
1668   // Lookup the entry, lazily creating it if necessary.
1669   std::string Suffix = getName({"cache", ""});
1670   return getOrCreateInternalVariable(
1671       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1672 }
1673 
1674 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1675                                                 const VarDecl *VD,
1676                                                 Address VDAddr,
1677                                                 SourceLocation Loc) {
1678   if (CGM.getLangOpts().OpenMPUseTLS &&
1679       CGM.getContext().getTargetInfo().isTLSSupported())
1680     return VDAddr;
1681 
1682   llvm::Type *VarTy = VDAddr.getElementType();
1683   llvm::Value *Args[] = {
1684       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1685       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1686       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1687       getOrCreateThreadPrivateCache(VD)};
1688   return Address(
1689       CGF.EmitRuntimeCall(
1690           OMPBuilder.getOrCreateRuntimeFunction(
1691               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1692           Args),
1693       CGF.Int8Ty, VDAddr.getAlignment());
1694 }
1695 
1696 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1697     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1698     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1699   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1700   // library.
1701   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1702   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1703                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1704                       OMPLoc);
1705   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1706   // to register constructor/destructor for variable.
1707   llvm::Value *Args[] = {
1708       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1709       Ctor, CopyCtor, Dtor};
1710   CGF.EmitRuntimeCall(
1711       OMPBuilder.getOrCreateRuntimeFunction(
1712           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1713       Args);
1714 }
1715 
1716 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1717     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1718     bool PerformInit, CodeGenFunction *CGF) {
1719   if (CGM.getLangOpts().OpenMPUseTLS &&
1720       CGM.getContext().getTargetInfo().isTLSSupported())
1721     return nullptr;
1722 
1723   VD = VD->getDefinition(CGM.getContext());
1724   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1725     QualType ASTTy = VD->getType();
1726 
1727     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1728     const Expr *Init = VD->getAnyInitializer();
1729     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1730       // Generate function that re-emits the declaration's initializer into the
1731       // threadprivate copy of the variable VD
1732       CodeGenFunction CtorCGF(CGM);
1733       FunctionArgList Args;
1734       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1735                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1736                             ImplicitParamDecl::Other);
1737       Args.push_back(&Dst);
1738 
1739       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1740           CGM.getContext().VoidPtrTy, Args);
1741       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1742       std::string Name = getName({"__kmpc_global_ctor_", ""});
1743       llvm::Function *Fn =
1744           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1745       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1746                             Args, Loc, Loc);
1747       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1748           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1749           CGM.getContext().VoidPtrTy, Dst.getLocation());
1750       Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1751       Arg = CtorCGF.Builder.CreateElementBitCast(
1752           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1753       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1754                                /*IsInitializer=*/true);
1755       ArgVal = CtorCGF.EmitLoadOfScalar(
1756           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1757           CGM.getContext().VoidPtrTy, Dst.getLocation());
1758       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1759       CtorCGF.FinishFunction();
1760       Ctor = Fn;
1761     }
1762     if (VD->getType().isDestructedType() != QualType::DK_none) {
1763       // Generate function that emits destructor call for the threadprivate copy
1764       // of the variable VD
1765       CodeGenFunction DtorCGF(CGM);
1766       FunctionArgList Args;
1767       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1768                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1769                             ImplicitParamDecl::Other);
1770       Args.push_back(&Dst);
1771 
1772       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1773           CGM.getContext().VoidTy, Args);
1774       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1775       std::string Name = getName({"__kmpc_global_dtor_", ""});
1776       llvm::Function *Fn =
1777           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1778       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1779       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1780                             Loc, Loc);
1781       // Create a scope with an artificial location for the body of this function.
1782       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1783       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1784           DtorCGF.GetAddrOfLocalVar(&Dst),
1785           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1786       DtorCGF.emitDestroy(
1787           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1788           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1789           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1790       DtorCGF.FinishFunction();
1791       Dtor = Fn;
1792     }
1793     // Do not emit init function if it is not required.
1794     if (!Ctor && !Dtor)
1795       return nullptr;
1796 
1797     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1798     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1799                                                /*isVarArg=*/false)
1800                            ->getPointerTo();
1801     // Copying constructor for the threadprivate variable.
1802     // Must be NULL - reserved by runtime, but currently it requires that this
1803     // parameter is always NULL. Otherwise it fires assertion.
1804     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1805     if (Ctor == nullptr) {
1806       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1807                                              /*isVarArg=*/false)
1808                          ->getPointerTo();
1809       Ctor = llvm::Constant::getNullValue(CtorTy);
1810     }
1811     if (Dtor == nullptr) {
1812       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1813                                              /*isVarArg=*/false)
1814                          ->getPointerTo();
1815       Dtor = llvm::Constant::getNullValue(DtorTy);
1816     }
1817     if (!CGF) {
1818       auto *InitFunctionTy =
1819           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1820       std::string Name = getName({"__omp_threadprivate_init_", ""});
1821       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1822           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1823       CodeGenFunction InitCGF(CGM);
1824       FunctionArgList ArgList;
1825       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1826                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1827                             Loc, Loc);
1828       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1829       InitCGF.FinishFunction();
1830       return InitFunction;
1831     }
1832     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1833   }
1834   return nullptr;
1835 }
1836 
1837 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1838                                                      llvm::GlobalVariable *Addr,
1839                                                      bool PerformInit) {
1840   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1841       !CGM.getLangOpts().OpenMPIsDevice)
1842     return false;
1843   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1844       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1845   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1846       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1847        HasRequiresUnifiedSharedMemory))
1848     return CGM.getLangOpts().OpenMPIsDevice;
1849   VD = VD->getDefinition(CGM.getContext());
1850   assert(VD && "Unknown VarDecl");
1851 
1852   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1853     return CGM.getLangOpts().OpenMPIsDevice;
1854 
1855   QualType ASTTy = VD->getType();
1856   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1857 
1858   // Produce the unique prefix to identify the new target regions. We use
1859   // the source location of the variable declaration which we know to not
1860   // conflict with any target region.
1861   unsigned DeviceID;
1862   unsigned FileID;
1863   unsigned Line;
1864   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1865   SmallString<128> Buffer, Out;
1866   {
1867     llvm::raw_svector_ostream OS(Buffer);
1868     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1869        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1870   }
1871 
1872   const Expr *Init = VD->getAnyInitializer();
1873   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1874     llvm::Constant *Ctor;
1875     llvm::Constant *ID;
1876     if (CGM.getLangOpts().OpenMPIsDevice) {
1877       // Generate function that re-emits the declaration's initializer into
1878       // the threadprivate copy of the variable VD
1879       CodeGenFunction CtorCGF(CGM);
1880 
1881       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1882       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1883       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1884           FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1885           llvm::GlobalValue::WeakODRLinkage);
1886       if (CGM.getTriple().isAMDGCN())
1887         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1888       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1889       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1890                             FunctionArgList(), Loc, Loc);
1891       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1892       llvm::Constant *AddrInAS0 = Addr;
1893       if (Addr->getAddressSpace() != 0)
1894         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1895             Addr, llvm::PointerType::getWithSamePointeeType(
1896                       cast<llvm::PointerType>(Addr->getType()), 0));
1897       CtorCGF.EmitAnyExprToMem(Init,
1898                                Address(AddrInAS0, Addr->getValueType(),
1899                                        CGM.getContext().getDeclAlign(VD)),
1900                                Init->getType().getQualifiers(),
1901                                /*IsInitializer=*/true);
1902       CtorCGF.FinishFunction();
1903       Ctor = Fn;
1904       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1905     } else {
1906       Ctor = new llvm::GlobalVariable(
1907           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1908           llvm::GlobalValue::PrivateLinkage,
1909           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1910       ID = Ctor;
1911     }
1912 
1913     // Register the information for the entry associated with the constructor.
1914     Out.clear();
1915     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1916         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1917         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1918   }
1919   if (VD->getType().isDestructedType() != QualType::DK_none) {
1920     llvm::Constant *Dtor;
1921     llvm::Constant *ID;
1922     if (CGM.getLangOpts().OpenMPIsDevice) {
1923       // Generate function that emits destructor call for the threadprivate
1924       // copy of the variable VD
1925       CodeGenFunction DtorCGF(CGM);
1926 
1927       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1928       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1929       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1930           FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1931           llvm::GlobalValue::WeakODRLinkage);
1932       if (CGM.getTriple().isAMDGCN())
1933         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1934       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1935       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1936                             FunctionArgList(), Loc, Loc);
1937       // Create a scope with an artificial location for the body of this
1938       // function.
1939       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1940       llvm::Constant *AddrInAS0 = Addr;
1941       if (Addr->getAddressSpace() != 0)
1942         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1943             Addr, llvm::PointerType::getWithSamePointeeType(
1944                       cast<llvm::PointerType>(Addr->getType()), 0));
1945       DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1946                                   CGM.getContext().getDeclAlign(VD)),
1947                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1948                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1949       DtorCGF.FinishFunction();
1950       Dtor = Fn;
1951       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1952     } else {
1953       Dtor = new llvm::GlobalVariable(
1954           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1955           llvm::GlobalValue::PrivateLinkage,
1956           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1957       ID = Dtor;
1958     }
1959     // Register the information for the entry associated with the destructor.
1960     Out.clear();
1961     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1962         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1963         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1964   }
1965   return CGM.getLangOpts().OpenMPIsDevice;
1966 }
1967 
1968 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1969                                                           QualType VarType,
1970                                                           StringRef Name) {
1971   std::string Suffix = getName({"artificial", ""});
1972   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1973   llvm::GlobalVariable *GAddr =
1974       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1975   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1976       CGM.getTarget().isTLSSupported()) {
1977     GAddr->setThreadLocal(/*Val=*/true);
1978     return Address(GAddr, GAddr->getValueType(),
1979                    CGM.getContext().getTypeAlignInChars(VarType));
1980   }
1981   std::string CacheSuffix = getName({"cache", ""});
1982   llvm::Value *Args[] = {
1983       emitUpdateLocation(CGF, SourceLocation()),
1984       getThreadID(CGF, SourceLocation()),
1985       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1986       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1987                                 /*isSigned=*/false),
1988       getOrCreateInternalVariable(
1989           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
1990   return Address(
1991       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1992           CGF.EmitRuntimeCall(
1993               OMPBuilder.getOrCreateRuntimeFunction(
1994                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1995               Args),
1996           VarLVType->getPointerTo(/*AddrSpace=*/0)),
1997       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1998 }
1999 
2000 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2001                                    const RegionCodeGenTy &ThenGen,
2002                                    const RegionCodeGenTy &ElseGen) {
2003   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2004 
2005   // If the condition constant folds and can be elided, try to avoid emitting
2006   // the condition and the dead arm of the if/else.
2007   bool CondConstant;
2008   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2009     if (CondConstant)
2010       ThenGen(CGF);
2011     else
2012       ElseGen(CGF);
2013     return;
2014   }
2015 
2016   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2017   // emit the conditional branch.
2018   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2019   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2020   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2021   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2022 
2023   // Emit the 'then' code.
2024   CGF.EmitBlock(ThenBlock);
2025   ThenGen(CGF);
2026   CGF.EmitBranch(ContBlock);
2027   // Emit the 'else' code if present.
2028   // There is no need to emit line number for unconditional branch.
2029   (void)ApplyDebugLocation::CreateEmpty(CGF);
2030   CGF.EmitBlock(ElseBlock);
2031   ElseGen(CGF);
2032   // There is no need to emit line number for unconditional branch.
2033   (void)ApplyDebugLocation::CreateEmpty(CGF);
2034   CGF.EmitBranch(ContBlock);
2035   // Emit the continuation block for code after the if.
2036   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2037 }
2038 
2039 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2040                                        llvm::Function *OutlinedFn,
2041                                        ArrayRef<llvm::Value *> CapturedVars,
2042                                        const Expr *IfCond,
2043                                        llvm::Value *NumThreads) {
2044   if (!CGF.HaveInsertPoint())
2045     return;
2046   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2047   auto &M = CGM.getModule();
2048   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2049                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2050     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2051     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2052     llvm::Value *Args[] = {
2053         RTLoc,
2054         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2055         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2056     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2057     RealArgs.append(std::begin(Args), std::end(Args));
2058     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2059 
2060     llvm::FunctionCallee RTLFn =
2061         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2062     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2063   };
2064   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2065                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2066     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2068     // Build calls:
2069     // __kmpc_serialized_parallel(&Loc, GTid);
2070     llvm::Value *Args[] = {RTLoc, ThreadID};
2071     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2072                             M, OMPRTL___kmpc_serialized_parallel),
2073                         Args);
2074 
2075     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2076     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2077     Address ZeroAddrBound =
2078         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2079                                          /*Name=*/".bound.zero.addr");
2080     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2081     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2082     // ThreadId for serialized parallels is 0.
2083     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2084     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2085     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2086 
2087     // Ensure we do not inline the function. This is trivially true for the ones
2088     // passed to __kmpc_fork_call but the ones called in serialized regions
2089     // could be inlined. This is not a perfect but it is closer to the invariant
2090     // we want, namely, every data environment starts with a new function.
2091     // TODO: We should pass the if condition to the runtime function and do the
2092     //       handling there. Much cleaner code.
2093     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2094     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2095     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2096 
2097     // __kmpc_end_serialized_parallel(&Loc, GTid);
2098     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2099     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2100                             M, OMPRTL___kmpc_end_serialized_parallel),
2101                         EndArgs);
2102   };
2103   if (IfCond) {
2104     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2105   } else {
2106     RegionCodeGenTy ThenRCG(ThenGen);
2107     ThenRCG(CGF);
2108   }
2109 }
2110 
2111 // If we're inside an (outlined) parallel region, use the region info's
2112 // thread-ID variable (it is passed in a first argument of the outlined function
2113 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2114 // regular serial code region, get thread ID by calling kmp_int32
2115 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2116 // return the address of that temp.
2117 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2118                                              SourceLocation Loc) {
2119   if (auto *OMPRegionInfo =
2120           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2121     if (OMPRegionInfo->getThreadIDVariable())
2122       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2123 
2124   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2125   QualType Int32Ty =
2126       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2127   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2128   CGF.EmitStoreOfScalar(ThreadID,
2129                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2130 
2131   return ThreadIDTemp;
2132 }
2133 
2134 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2135     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2136   SmallString<256> Buffer;
2137   llvm::raw_svector_ostream Out(Buffer);
2138   Out << Name;
2139   StringRef RuntimeName = Out.str();
2140   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2141   if (Elem.second) {
2142     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2143            "OMP internal variable has different type than requested");
2144     return &*Elem.second;
2145   }
2146 
2147   return Elem.second = new llvm::GlobalVariable(
2148              CGM.getModule(), Ty, /*IsConstant*/ false,
2149              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2150              Elem.first(), /*InsertBefore=*/nullptr,
2151              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2152 }
2153 
2154 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2155   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2156   std::string Name = getName({Prefix, "var"});
2157   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2158 }
2159 
2160 namespace {
2161 /// Common pre(post)-action for different OpenMP constructs.
2162 class CommonActionTy final : public PrePostActionTy {
2163   llvm::FunctionCallee EnterCallee;
2164   ArrayRef<llvm::Value *> EnterArgs;
2165   llvm::FunctionCallee ExitCallee;
2166   ArrayRef<llvm::Value *> ExitArgs;
2167   bool Conditional;
2168   llvm::BasicBlock *ContBlock = nullptr;
2169 
2170 public:
2171   CommonActionTy(llvm::FunctionCallee EnterCallee,
2172                  ArrayRef<llvm::Value *> EnterArgs,
2173                  llvm::FunctionCallee ExitCallee,
2174                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2175       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2176         ExitArgs(ExitArgs), Conditional(Conditional) {}
2177   void Enter(CodeGenFunction &CGF) override {
2178     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2179     if (Conditional) {
2180       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2181       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2182       ContBlock = CGF.createBasicBlock("omp_if.end");
2183       // Generate the branch (If-stmt)
2184       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2185       CGF.EmitBlock(ThenBlock);
2186     }
2187   }
2188   void Done(CodeGenFunction &CGF) {
2189     // Emit the rest of blocks/branches
2190     CGF.EmitBranch(ContBlock);
2191     CGF.EmitBlock(ContBlock, true);
2192   }
2193   void Exit(CodeGenFunction &CGF) override {
2194     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2195   }
2196 };
2197 } // anonymous namespace
2198 
2199 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2200                                          StringRef CriticalName,
2201                                          const RegionCodeGenTy &CriticalOpGen,
2202                                          SourceLocation Loc, const Expr *Hint) {
2203   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2204   // CriticalOpGen();
2205   // __kmpc_end_critical(ident_t *, gtid, Lock);
2206   // Prepare arguments and build a call to __kmpc_critical
2207   if (!CGF.HaveInsertPoint())
2208     return;
2209   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2210                          getCriticalRegionLock(CriticalName)};
2211   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2212                                                 std::end(Args));
2213   if (Hint) {
2214     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2215         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2216   }
2217   CommonActionTy Action(
2218       OMPBuilder.getOrCreateRuntimeFunction(
2219           CGM.getModule(),
2220           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2221       EnterArgs,
2222       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2223                                             OMPRTL___kmpc_end_critical),
2224       Args);
2225   CriticalOpGen.setAction(Action);
2226   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2227 }
2228 
2229 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2230                                        const RegionCodeGenTy &MasterOpGen,
2231                                        SourceLocation Loc) {
2232   if (!CGF.HaveInsertPoint())
2233     return;
2234   // if(__kmpc_master(ident_t *, gtid)) {
2235   //   MasterOpGen();
2236   //   __kmpc_end_master(ident_t *, gtid);
2237   // }
2238   // Prepare arguments and build a call to __kmpc_master
2239   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2240   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2241                             CGM.getModule(), OMPRTL___kmpc_master),
2242                         Args,
2243                         OMPBuilder.getOrCreateRuntimeFunction(
2244                             CGM.getModule(), OMPRTL___kmpc_end_master),
2245                         Args,
2246                         /*Conditional=*/true);
2247   MasterOpGen.setAction(Action);
2248   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2249   Action.Done(CGF);
2250 }
2251 
2252 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2253                                        const RegionCodeGenTy &MaskedOpGen,
2254                                        SourceLocation Loc, const Expr *Filter) {
2255   if (!CGF.HaveInsertPoint())
2256     return;
2257   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2258   //   MaskedOpGen();
2259   //   __kmpc_end_masked(iden_t *, gtid);
2260   // }
2261   // Prepare arguments and build a call to __kmpc_masked
2262   llvm::Value *FilterVal = Filter
2263                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2264                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2265   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2266                          FilterVal};
2267   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2268                             getThreadID(CGF, Loc)};
2269   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2270                             CGM.getModule(), OMPRTL___kmpc_masked),
2271                         Args,
2272                         OMPBuilder.getOrCreateRuntimeFunction(
2273                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2274                         ArgsEnd,
2275                         /*Conditional=*/true);
2276   MaskedOpGen.setAction(Action);
2277   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2278   Action.Done(CGF);
2279 }
2280 
2281 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2282                                         SourceLocation Loc) {
2283   if (!CGF.HaveInsertPoint())
2284     return;
2285   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2286     OMPBuilder.createTaskyield(CGF.Builder);
2287   } else {
2288     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2289     llvm::Value *Args[] = {
2290         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2291         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2292     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2293                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2294                         Args);
2295   }
2296 
2297   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2298     Region->emitUntiedSwitch(CGF);
2299 }
2300 
2301 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2302                                           const RegionCodeGenTy &TaskgroupOpGen,
2303                                           SourceLocation Loc) {
2304   if (!CGF.HaveInsertPoint())
2305     return;
2306   // __kmpc_taskgroup(ident_t *, gtid);
2307   // TaskgroupOpGen();
2308   // __kmpc_end_taskgroup(ident_t *, gtid);
2309   // Prepare arguments and build a call to __kmpc_taskgroup
2310   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2311   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2312                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2313                         Args,
2314                         OMPBuilder.getOrCreateRuntimeFunction(
2315                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2316                         Args);
2317   TaskgroupOpGen.setAction(Action);
2318   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2319 }
2320 
2321 /// Given an array of pointers to variables, project the address of a
2322 /// given variable.
2323 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2324                                       unsigned Index, const VarDecl *Var) {
2325   // Pull out the pointer to the variable.
2326   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2327   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2328 
2329   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2330   return Address(
2331       CGF.Builder.CreateBitCast(
2332           Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2333       ElemTy, CGF.getContext().getDeclAlign(Var));
2334 }
2335 
2336 static llvm::Value *emitCopyprivateCopyFunction(
2337     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2338     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2339     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2340     SourceLocation Loc) {
2341   ASTContext &C = CGM.getContext();
2342   // void copy_func(void *LHSArg, void *RHSArg);
2343   FunctionArgList Args;
2344   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2345                            ImplicitParamDecl::Other);
2346   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2347                            ImplicitParamDecl::Other);
2348   Args.push_back(&LHSArg);
2349   Args.push_back(&RHSArg);
2350   const auto &CGFI =
2351       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2352   std::string Name =
2353       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2354   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2355                                     llvm::GlobalValue::InternalLinkage, Name,
2356                                     &CGM.getModule());
2357   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2358   Fn->setDoesNotRecurse();
2359   CodeGenFunction CGF(CGM);
2360   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2361   // Dest = (void*[n])(LHSArg);
2362   // Src = (void*[n])(RHSArg);
2363   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2364                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2365                   ArgsElemType->getPointerTo()),
2366               ArgsElemType, CGF.getPointerAlign());
2367   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2368                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2369                   ArgsElemType->getPointerTo()),
2370               ArgsElemType, CGF.getPointerAlign());
2371   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2372   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2373   // ...
2374   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2375   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2376     const auto *DestVar =
2377         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2378     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2379 
2380     const auto *SrcVar =
2381         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2382     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2383 
2384     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2385     QualType Type = VD->getType();
2386     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2387   }
2388   CGF.FinishFunction();
2389   return Fn;
2390 }
2391 
2392 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2393                                        const RegionCodeGenTy &SingleOpGen,
2394                                        SourceLocation Loc,
2395                                        ArrayRef<const Expr *> CopyprivateVars,
2396                                        ArrayRef<const Expr *> SrcExprs,
2397                                        ArrayRef<const Expr *> DstExprs,
2398                                        ArrayRef<const Expr *> AssignmentOps) {
2399   if (!CGF.HaveInsertPoint())
2400     return;
2401   assert(CopyprivateVars.size() == SrcExprs.size() &&
2402          CopyprivateVars.size() == DstExprs.size() &&
2403          CopyprivateVars.size() == AssignmentOps.size());
2404   ASTContext &C = CGM.getContext();
2405   // int32 did_it = 0;
2406   // if(__kmpc_single(ident_t *, gtid)) {
2407   //   SingleOpGen();
2408   //   __kmpc_end_single(ident_t *, gtid);
2409   //   did_it = 1;
2410   // }
2411   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2412   // <copy_func>, did_it);
2413 
2414   Address DidIt = Address::invalid();
2415   if (!CopyprivateVars.empty()) {
2416     // int32 did_it = 0;
2417     QualType KmpInt32Ty =
2418         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2419     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2420     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2421   }
2422   // Prepare arguments and build a call to __kmpc_single
2423   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2424   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2425                             CGM.getModule(), OMPRTL___kmpc_single),
2426                         Args,
2427                         OMPBuilder.getOrCreateRuntimeFunction(
2428                             CGM.getModule(), OMPRTL___kmpc_end_single),
2429                         Args,
2430                         /*Conditional=*/true);
2431   SingleOpGen.setAction(Action);
2432   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2433   if (DidIt.isValid()) {
2434     // did_it = 1;
2435     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2436   }
2437   Action.Done(CGF);
2438   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2439   // <copy_func>, did_it);
2440   if (DidIt.isValid()) {
2441     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2442     QualType CopyprivateArrayTy = C.getConstantArrayType(
2443         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2444         /*IndexTypeQuals=*/0);
2445     // Create a list of all private variables for copyprivate.
2446     Address CopyprivateList =
2447         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2448     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2449       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2450       CGF.Builder.CreateStore(
2451           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2452               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2453               CGF.VoidPtrTy),
2454           Elem);
2455     }
2456     // Build function that copies private values from single region to all other
2457     // threads in the corresponding parallel region.
2458     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2459         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2460         SrcExprs, DstExprs, AssignmentOps, Loc);
2461     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2462     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2463         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2464     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2465     llvm::Value *Args[] = {
2466         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2467         getThreadID(CGF, Loc),        // i32 <gtid>
2468         BufSize,                      // size_t <buf_size>
2469         CL.getPointer(),              // void *<copyprivate list>
2470         CpyFn,                        // void (*) (void *, void *) <copy_func>
2471         DidItVal                      // i32 did_it
2472     };
2473     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2474                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2475                         Args);
2476   }
2477 }
2478 
2479 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2480                                         const RegionCodeGenTy &OrderedOpGen,
2481                                         SourceLocation Loc, bool IsThreads) {
2482   if (!CGF.HaveInsertPoint())
2483     return;
2484   // __kmpc_ordered(ident_t *, gtid);
2485   // OrderedOpGen();
2486   // __kmpc_end_ordered(ident_t *, gtid);
2487   // Prepare arguments and build a call to __kmpc_ordered
2488   if (IsThreads) {
2489     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2490     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2491                               CGM.getModule(), OMPRTL___kmpc_ordered),
2492                           Args,
2493                           OMPBuilder.getOrCreateRuntimeFunction(
2494                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2495                           Args);
2496     OrderedOpGen.setAction(Action);
2497     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2498     return;
2499   }
2500   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2501 }
2502 
2503 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2504   unsigned Flags;
2505   if (Kind == OMPD_for)
2506     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2507   else if (Kind == OMPD_sections)
2508     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2509   else if (Kind == OMPD_single)
2510     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2511   else if (Kind == OMPD_barrier)
2512     Flags = OMP_IDENT_BARRIER_EXPL;
2513   else
2514     Flags = OMP_IDENT_BARRIER_IMPL;
2515   return Flags;
2516 }
2517 
2518 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2519     CodeGenFunction &CGF, const OMPLoopDirective &S,
2520     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2521   // Check if the loop directive is actually a doacross loop directive. In this
2522   // case choose static, 1 schedule.
2523   if (llvm::any_of(
2524           S.getClausesOfKind<OMPOrderedClause>(),
2525           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2526     ScheduleKind = OMPC_SCHEDULE_static;
2527     // Chunk size is 1 in this case.
2528     llvm::APInt ChunkSize(32, 1);
2529     ChunkExpr = IntegerLiteral::Create(
2530         CGF.getContext(), ChunkSize,
2531         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2532         SourceLocation());
2533   }
2534 }
2535 
2536 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2537                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2538                                       bool ForceSimpleCall) {
2539   // Check if we should use the OMPBuilder
2540   auto *OMPRegionInfo =
2541       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2542   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2543     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2544         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2545     return;
2546   }
2547 
2548   if (!CGF.HaveInsertPoint())
2549     return;
2550   // Build call __kmpc_cancel_barrier(loc, thread_id);
2551   // Build call __kmpc_barrier(loc, thread_id);
2552   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2553   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2554   // thread_id);
2555   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2556                          getThreadID(CGF, Loc)};
2557   if (OMPRegionInfo) {
2558     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2559       llvm::Value *Result = CGF.EmitRuntimeCall(
2560           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2561                                                 OMPRTL___kmpc_cancel_barrier),
2562           Args);
2563       if (EmitChecks) {
2564         // if (__kmpc_cancel_barrier()) {
2565         //   exit from construct;
2566         // }
2567         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2568         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2569         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2570         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2571         CGF.EmitBlock(ExitBB);
2572         //   exit from construct;
2573         CodeGenFunction::JumpDest CancelDestination =
2574             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2575         CGF.EmitBranchThroughCleanup(CancelDestination);
2576         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2577       }
2578       return;
2579     }
2580   }
2581   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2582                           CGM.getModule(), OMPRTL___kmpc_barrier),
2583                       Args);
2584 }
2585 
2586 /// Map the OpenMP loop schedule to the runtime enumeration.
2587 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2588                                           bool Chunked, bool Ordered) {
2589   switch (ScheduleKind) {
2590   case OMPC_SCHEDULE_static:
2591     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2592                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2593   case OMPC_SCHEDULE_dynamic:
2594     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2595   case OMPC_SCHEDULE_guided:
2596     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2597   case OMPC_SCHEDULE_runtime:
2598     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2599   case OMPC_SCHEDULE_auto:
2600     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2601   case OMPC_SCHEDULE_unknown:
2602     assert(!Chunked && "chunk was specified but schedule kind not known");
2603     return Ordered ? OMP_ord_static : OMP_sch_static;
2604   }
2605   llvm_unreachable("Unexpected runtime schedule");
2606 }
2607 
2608 /// Map the OpenMP distribute schedule to the runtime enumeration.
2609 static OpenMPSchedType
2610 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2611   // only static is allowed for dist_schedule
2612   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2613 }
2614 
2615 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2616                                          bool Chunked) const {
2617   OpenMPSchedType Schedule =
2618       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2619   return Schedule == OMP_sch_static;
2620 }
2621 
2622 bool CGOpenMPRuntime::isStaticNonchunked(
2623     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2624   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2625   return Schedule == OMP_dist_sch_static;
2626 }
2627 
2628 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2629                                       bool Chunked) const {
2630   OpenMPSchedType Schedule =
2631       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2632   return Schedule == OMP_sch_static_chunked;
2633 }
2634 
2635 bool CGOpenMPRuntime::isStaticChunked(
2636     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2637   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2638   return Schedule == OMP_dist_sch_static_chunked;
2639 }
2640 
2641 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2642   OpenMPSchedType Schedule =
2643       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2644   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2645   return Schedule != OMP_sch_static;
2646 }
2647 
2648 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2649                                   OpenMPScheduleClauseModifier M1,
2650                                   OpenMPScheduleClauseModifier M2) {
2651   int Modifier = 0;
2652   switch (M1) {
2653   case OMPC_SCHEDULE_MODIFIER_monotonic:
2654     Modifier = OMP_sch_modifier_monotonic;
2655     break;
2656   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2657     Modifier = OMP_sch_modifier_nonmonotonic;
2658     break;
2659   case OMPC_SCHEDULE_MODIFIER_simd:
2660     if (Schedule == OMP_sch_static_chunked)
2661       Schedule = OMP_sch_static_balanced_chunked;
2662     break;
2663   case OMPC_SCHEDULE_MODIFIER_last:
2664   case OMPC_SCHEDULE_MODIFIER_unknown:
2665     break;
2666   }
2667   switch (M2) {
2668   case OMPC_SCHEDULE_MODIFIER_monotonic:
2669     Modifier = OMP_sch_modifier_monotonic;
2670     break;
2671   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2672     Modifier = OMP_sch_modifier_nonmonotonic;
2673     break;
2674   case OMPC_SCHEDULE_MODIFIER_simd:
2675     if (Schedule == OMP_sch_static_chunked)
2676       Schedule = OMP_sch_static_balanced_chunked;
2677     break;
2678   case OMPC_SCHEDULE_MODIFIER_last:
2679   case OMPC_SCHEDULE_MODIFIER_unknown:
2680     break;
2681   }
2682   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2683   // If the static schedule kind is specified or if the ordered clause is
2684   // specified, and if the nonmonotonic modifier is not specified, the effect is
2685   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2686   // modifier is specified, the effect is as if the nonmonotonic modifier is
2687   // specified.
2688   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2689     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2690           Schedule == OMP_sch_static_balanced_chunked ||
2691           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2692           Schedule == OMP_dist_sch_static_chunked ||
2693           Schedule == OMP_dist_sch_static))
2694       Modifier = OMP_sch_modifier_nonmonotonic;
2695   }
2696   return Schedule | Modifier;
2697 }
2698 
2699 void CGOpenMPRuntime::emitForDispatchInit(
2700     CodeGenFunction &CGF, SourceLocation Loc,
2701     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2702     bool Ordered, const DispatchRTInput &DispatchValues) {
2703   if (!CGF.HaveInsertPoint())
2704     return;
2705   OpenMPSchedType Schedule = getRuntimeSchedule(
2706       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2707   assert(Ordered ||
2708          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2709           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2710           Schedule != OMP_sch_static_balanced_chunked));
2711   // Call __kmpc_dispatch_init(
2712   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2713   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2714   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2715 
2716   // If the Chunk was not specified in the clause - use default value 1.
2717   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2718                                             : CGF.Builder.getIntN(IVSize, 1);
2719   llvm::Value *Args[] = {
2720       emitUpdateLocation(CGF, Loc),
2721       getThreadID(CGF, Loc),
2722       CGF.Builder.getInt32(addMonoNonMonoModifier(
2723           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2724       DispatchValues.LB,                                     // Lower
2725       DispatchValues.UB,                                     // Upper
2726       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2727       Chunk                                                  // Chunk
2728   };
2729   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2730 }
2731 
2732 static void emitForStaticInitCall(
2733     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2734     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2735     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2736     const CGOpenMPRuntime::StaticRTInput &Values) {
2737   if (!CGF.HaveInsertPoint())
2738     return;
2739 
2740   assert(!Values.Ordered);
2741   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2742          Schedule == OMP_sch_static_balanced_chunked ||
2743          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2744          Schedule == OMP_dist_sch_static ||
2745          Schedule == OMP_dist_sch_static_chunked);
2746 
2747   // Call __kmpc_for_static_init(
2748   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2749   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2750   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2751   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2752   llvm::Value *Chunk = Values.Chunk;
2753   if (Chunk == nullptr) {
2754     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2755             Schedule == OMP_dist_sch_static) &&
2756            "expected static non-chunked schedule");
2757     // If the Chunk was not specified in the clause - use default value 1.
2758     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2759   } else {
2760     assert((Schedule == OMP_sch_static_chunked ||
2761             Schedule == OMP_sch_static_balanced_chunked ||
2762             Schedule == OMP_ord_static_chunked ||
2763             Schedule == OMP_dist_sch_static_chunked) &&
2764            "expected static chunked schedule");
2765   }
2766   llvm::Value *Args[] = {
2767       UpdateLocation,
2768       ThreadId,
2769       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2770                                                   M2)), // Schedule type
2771       Values.IL.getPointer(),                           // &isLastIter
2772       Values.LB.getPointer(),                           // &LB
2773       Values.UB.getPointer(),                           // &UB
2774       Values.ST.getPointer(),                           // &Stride
2775       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2776       Chunk                                             // Chunk
2777   };
2778   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2779 }
2780 
2781 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2782                                         SourceLocation Loc,
2783                                         OpenMPDirectiveKind DKind,
2784                                         const OpenMPScheduleTy &ScheduleKind,
2785                                         const StaticRTInput &Values) {
2786   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2787       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2788   assert(isOpenMPWorksharingDirective(DKind) &&
2789          "Expected loop-based or sections-based directive.");
2790   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2791                                              isOpenMPLoopDirective(DKind)
2792                                                  ? OMP_IDENT_WORK_LOOP
2793                                                  : OMP_IDENT_WORK_SECTIONS);
2794   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2795   llvm::FunctionCallee StaticInitFunction =
2796       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2797   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2798   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2799                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2800 }
2801 
2802 void CGOpenMPRuntime::emitDistributeStaticInit(
2803     CodeGenFunction &CGF, SourceLocation Loc,
2804     OpenMPDistScheduleClauseKind SchedKind,
2805     const CGOpenMPRuntime::StaticRTInput &Values) {
2806   OpenMPSchedType ScheduleNum =
2807       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2808   llvm::Value *UpdatedLocation =
2809       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2810   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2811   llvm::FunctionCallee StaticInitFunction;
2812   bool isGPUDistribute =
2813       CGM.getLangOpts().OpenMPIsDevice &&
2814       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2815   StaticInitFunction = createForStaticInitFunction(
2816       Values.IVSize, Values.IVSigned, isGPUDistribute);
2817 
2818   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2819                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2820                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2821 }
2822 
2823 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2824                                           SourceLocation Loc,
2825                                           OpenMPDirectiveKind DKind) {
2826   if (!CGF.HaveInsertPoint())
2827     return;
2828   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2829   llvm::Value *Args[] = {
2830       emitUpdateLocation(CGF, Loc,
2831                          isOpenMPDistributeDirective(DKind)
2832                              ? OMP_IDENT_WORK_DISTRIBUTE
2833                              : isOpenMPLoopDirective(DKind)
2834                                    ? OMP_IDENT_WORK_LOOP
2835                                    : OMP_IDENT_WORK_SECTIONS),
2836       getThreadID(CGF, Loc)};
2837   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2838   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2839       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2840     CGF.EmitRuntimeCall(
2841         OMPBuilder.getOrCreateRuntimeFunction(
2842             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2843         Args);
2844   else
2845     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2846                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2847                         Args);
2848 }
2849 
2850 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2851                                                  SourceLocation Loc,
2852                                                  unsigned IVSize,
2853                                                  bool IVSigned) {
2854   if (!CGF.HaveInsertPoint())
2855     return;
2856   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2857   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2858   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2859 }
2860 
2861 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2862                                           SourceLocation Loc, unsigned IVSize,
2863                                           bool IVSigned, Address IL,
2864                                           Address LB, Address UB,
2865                                           Address ST) {
2866   // Call __kmpc_dispatch_next(
2867   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2868   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2869   //          kmp_int[32|64] *p_stride);
2870   llvm::Value *Args[] = {
2871       emitUpdateLocation(CGF, Loc),
2872       getThreadID(CGF, Loc),
2873       IL.getPointer(), // &isLastIter
2874       LB.getPointer(), // &Lower
2875       UB.getPointer(), // &Upper
2876       ST.getPointer()  // &Stride
2877   };
2878   llvm::Value *Call =
2879       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2880   return CGF.EmitScalarConversion(
2881       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2882       CGF.getContext().BoolTy, Loc);
2883 }
2884 
2885 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2886                                            llvm::Value *NumThreads,
2887                                            SourceLocation Loc) {
2888   if (!CGF.HaveInsertPoint())
2889     return;
2890   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2891   llvm::Value *Args[] = {
2892       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2893       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2894   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2895                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2896                       Args);
2897 }
2898 
2899 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2900                                          ProcBindKind ProcBind,
2901                                          SourceLocation Loc) {
2902   if (!CGF.HaveInsertPoint())
2903     return;
2904   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2905   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2906   llvm::Value *Args[] = {
2907       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2908       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2909   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2910                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2911                       Args);
2912 }
2913 
2914 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2915                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2916   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2917     OMPBuilder.createFlush(CGF.Builder);
2918   } else {
2919     if (!CGF.HaveInsertPoint())
2920       return;
2921     // Build call void __kmpc_flush(ident_t *loc)
2922     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2923                             CGM.getModule(), OMPRTL___kmpc_flush),
2924                         emitUpdateLocation(CGF, Loc));
2925   }
2926 }
2927 
2928 namespace {
2929 /// Indexes of fields for type kmp_task_t.
2930 enum KmpTaskTFields {
2931   /// List of shared variables.
2932   KmpTaskTShareds,
2933   /// Task routine.
2934   KmpTaskTRoutine,
2935   /// Partition id for the untied tasks.
2936   KmpTaskTPartId,
2937   /// Function with call of destructors for private variables.
2938   Data1,
2939   /// Task priority.
2940   Data2,
2941   /// (Taskloops only) Lower bound.
2942   KmpTaskTLowerBound,
2943   /// (Taskloops only) Upper bound.
2944   KmpTaskTUpperBound,
2945   /// (Taskloops only) Stride.
2946   KmpTaskTStride,
2947   /// (Taskloops only) Is last iteration flag.
2948   KmpTaskTLastIter,
2949   /// (Taskloops only) Reduction data.
2950   KmpTaskTReductions,
2951 };
2952 } // anonymous namespace
2953 
2954 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2955   return OffloadEntriesTargetRegion.empty() &&
2956          OffloadEntriesDeviceGlobalVar.empty();
2957 }
2958 
2959 /// Initialize target region entry.
2960 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2961     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2962                                     StringRef ParentName, unsigned LineNum,
2963                                     unsigned Order) {
2964   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2965                                              "only required for the device "
2966                                              "code generation.");
2967   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2968       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2969                                    OMPTargetRegionEntryTargetRegion);
2970   ++OffloadingEntriesNum;
2971 }
2972 
2973 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2974     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2975                                   StringRef ParentName, unsigned LineNum,
2976                                   llvm::Constant *Addr, llvm::Constant *ID,
2977                                   OMPTargetRegionEntryKind Flags) {
2978   // If we are emitting code for a target, the entry is already initialized,
2979   // only has to be registered.
2980   if (CGM.getLangOpts().OpenMPIsDevice) {
2981     // This could happen if the device compilation is invoked standalone.
2982     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2983       return;
2984     auto &Entry =
2985         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2986     Entry.setAddress(Addr);
2987     Entry.setID(ID);
2988     Entry.setFlags(Flags);
2989   } else {
2990     if (Flags ==
2991             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2992         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2993                                  /*IgnoreAddressId*/ true))
2994       return;
2995     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2996            "Target region entry already registered!");
2997     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2998     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2999     ++OffloadingEntriesNum;
3000   }
3001 }
3002 
3003 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3004     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3005     bool IgnoreAddressId) const {
3006   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3007   if (PerDevice == OffloadEntriesTargetRegion.end())
3008     return false;
3009   auto PerFile = PerDevice->second.find(FileID);
3010   if (PerFile == PerDevice->second.end())
3011     return false;
3012   auto PerParentName = PerFile->second.find(ParentName);
3013   if (PerParentName == PerFile->second.end())
3014     return false;
3015   auto PerLine = PerParentName->second.find(LineNum);
3016   if (PerLine == PerParentName->second.end())
3017     return false;
3018   // Fail if this entry is already registered.
3019   if (!IgnoreAddressId &&
3020       (PerLine->second.getAddress() || PerLine->second.getID()))
3021     return false;
3022   return true;
3023 }
3024 
3025 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3026     const OffloadTargetRegionEntryInfoActTy &Action) {
3027   // Scan all target region entries and perform the provided action.
3028   for (const auto &D : OffloadEntriesTargetRegion)
3029     for (const auto &F : D.second)
3030       for (const auto &P : F.second)
3031         for (const auto &L : P.second)
3032           Action(D.first, F.first, P.first(), L.first, L.second);
3033 }
3034 
3035 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3036     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3037                                        OMPTargetGlobalVarEntryKind Flags,
3038                                        unsigned Order) {
3039   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3040                                              "only required for the device "
3041                                              "code generation.");
3042   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3043   ++OffloadingEntriesNum;
3044 }
3045 
3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3047     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3048                                      CharUnits VarSize,
3049                                      OMPTargetGlobalVarEntryKind Flags,
3050                                      llvm::GlobalValue::LinkageTypes Linkage) {
3051   if (CGM.getLangOpts().OpenMPIsDevice) {
3052     // This could happen if the device compilation is invoked standalone.
3053     if (!hasDeviceGlobalVarEntryInfo(VarName))
3054       return;
3055     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3056     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3057       if (Entry.getVarSize().isZero()) {
3058         Entry.setVarSize(VarSize);
3059         Entry.setLinkage(Linkage);
3060       }
3061       return;
3062     }
3063     Entry.setVarSize(VarSize);
3064     Entry.setLinkage(Linkage);
3065     Entry.setAddress(Addr);
3066   } else {
3067     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3068       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3069       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3070              "Entry not initialized!");
3071       if (Entry.getVarSize().isZero()) {
3072         Entry.setVarSize(VarSize);
3073         Entry.setLinkage(Linkage);
3074       }
3075       return;
3076     }
3077     OffloadEntriesDeviceGlobalVar.try_emplace(
3078         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3079     ++OffloadingEntriesNum;
3080   }
3081 }
3082 
3083 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3084     actOnDeviceGlobalVarEntriesInfo(
3085         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3086   // Scan all target region entries and perform the provided action.
3087   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3088     Action(E.getKey(), E.getValue());
3089 }
3090 
3091 void CGOpenMPRuntime::createOffloadEntry(
3092     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3093     llvm::GlobalValue::LinkageTypes Linkage) {
3094   OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags);
3095 }
3096 
3097 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3098   // Emit the offloading entries and metadata so that the device codegen side
3099   // can easily figure out what to emit. The produced metadata looks like
3100   // this:
3101   //
3102   // !omp_offload.info = !{!1, ...}
3103   //
3104   // Right now we only generate metadata for function that contain target
3105   // regions.
3106 
3107   // If we are in simd mode or there are no entries, we don't need to do
3108   // anything.
3109   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3110     return;
3111 
3112   llvm::Module &M = CGM.getModule();
3113   llvm::LLVMContext &C = M.getContext();
3114   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3115                          SourceLocation, StringRef>,
3116               16>
3117       OrderedEntries(OffloadEntriesInfoManager.size());
3118   llvm::SmallVector<StringRef, 16> ParentFunctions(
3119       OffloadEntriesInfoManager.size());
3120 
3121   // Auxiliary methods to create metadata values and strings.
3122   auto &&GetMDInt = [this](unsigned V) {
3123     return llvm::ConstantAsMetadata::get(
3124         llvm::ConstantInt::get(CGM.Int32Ty, V));
3125   };
3126 
3127   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3128 
3129   // Create the offloading info metadata node.
3130   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3131 
3132   // Create function that emits metadata for each target region entry;
3133   auto &&TargetRegionMetadataEmitter =
3134       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3135        &GetMDString](
3136           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3137           unsigned Line,
3138           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3139         // Generate metadata for target regions. Each entry of this metadata
3140         // contains:
3141         // - Entry 0 -> Kind of this type of metadata (0).
3142         // - Entry 1 -> Device ID of the file where the entry was identified.
3143         // - Entry 2 -> File ID of the file where the entry was identified.
3144         // - Entry 3 -> Mangled name of the function where the entry was
3145         // identified.
3146         // - Entry 4 -> Line in the file where the entry was identified.
3147         // - Entry 5 -> Order the entry was created.
3148         // The first element of the metadata node is the kind.
3149         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3150                                  GetMDInt(FileID),      GetMDString(ParentName),
3151                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3152 
3153         SourceLocation Loc;
3154         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3155                   E = CGM.getContext().getSourceManager().fileinfo_end();
3156              I != E; ++I) {
3157           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3158               I->getFirst()->getUniqueID().getFile() == FileID) {
3159             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3160                 I->getFirst(), Line, 1);
3161             break;
3162           }
3163         }
3164         // Save this entry in the right position of the ordered entries array.
3165         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3166         ParentFunctions[E.getOrder()] = ParentName;
3167 
3168         // Add metadata to the named metadata node.
3169         MD->addOperand(llvm::MDNode::get(C, Ops));
3170       };
3171 
3172   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3173       TargetRegionMetadataEmitter);
3174 
3175   // Create function that emits metadata for each device global variable entry;
3176   auto &&DeviceGlobalVarMetadataEmitter =
3177       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3178        MD](StringRef MangledName,
3179            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3180                &E) {
3181         // Generate metadata for global variables. Each entry of this metadata
3182         // contains:
3183         // - Entry 0 -> Kind of this type of metadata (1).
3184         // - Entry 1 -> Mangled name of the variable.
3185         // - Entry 2 -> Declare target kind.
3186         // - Entry 3 -> Order the entry was created.
3187         // The first element of the metadata node is the kind.
3188         llvm::Metadata *Ops[] = {
3189             GetMDInt(E.getKind()), GetMDString(MangledName),
3190             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3191 
3192         // Save this entry in the right position of the ordered entries array.
3193         OrderedEntries[E.getOrder()] =
3194             std::make_tuple(&E, SourceLocation(), MangledName);
3195 
3196         // Add metadata to the named metadata node.
3197         MD->addOperand(llvm::MDNode::get(C, Ops));
3198       };
3199 
3200   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3201       DeviceGlobalVarMetadataEmitter);
3202 
3203   for (const auto &E : OrderedEntries) {
3204     assert(std::get<0>(E) && "All ordered entries must exist!");
3205     if (const auto *CE =
3206             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3207                 std::get<0>(E))) {
3208       if (!CE->getID() || !CE->getAddress()) {
3209         // Do not blame the entry if the parent funtion is not emitted.
3210         StringRef FnName = ParentFunctions[CE->getOrder()];
3211         if (!CGM.GetGlobalValue(FnName))
3212           continue;
3213         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3214             DiagnosticsEngine::Error,
3215             "Offloading entry for target region in %0 is incorrect: either the "
3216             "address or the ID is invalid.");
3217         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3218         continue;
3219       }
3220       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3221                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3222     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3223                                              OffloadEntryInfoDeviceGlobalVar>(
3224                    std::get<0>(E))) {
3225       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3226           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3227               CE->getFlags());
3228       switch (Flags) {
3229       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3230         if (CGM.getLangOpts().OpenMPIsDevice &&
3231             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3232           continue;
3233         if (!CE->getAddress()) {
3234           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3235               DiagnosticsEngine::Error, "Offloading entry for declare target "
3236                                         "variable %0 is incorrect: the "
3237                                         "address is invalid.");
3238           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3239           continue;
3240         }
3241         // The vaiable has no definition - no need to add the entry.
3242         if (CE->getVarSize().isZero())
3243           continue;
3244         break;
3245       }
3246       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3247         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3248                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3249                "Declaret target link address is set.");
3250         if (CGM.getLangOpts().OpenMPIsDevice)
3251           continue;
3252         if (!CE->getAddress()) {
3253           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3254               DiagnosticsEngine::Error,
3255               "Offloading entry for declare target variable is incorrect: the "
3256               "address is invalid.");
3257           CGM.getDiags().Report(DiagID);
3258           continue;
3259         }
3260         break;
3261       }
3262 
3263       // Hidden or internal symbols on the device are not externally visible. We
3264       // should not attempt to register them by creating an offloading entry.
3265       if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress()))
3266         if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
3267           continue;
3268 
3269       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3270                          CE->getVarSize().getQuantity(), Flags,
3271                          CE->getLinkage());
3272     } else {
3273       llvm_unreachable("Unsupported entry kind.");
3274     }
3275   }
3276 }
3277 
3278 /// Loads all the offload entries information from the host IR
3279 /// metadata.
3280 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3281   // If we are in target mode, load the metadata from the host IR. This code has
3282   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3283 
3284   if (!CGM.getLangOpts().OpenMPIsDevice)
3285     return;
3286 
3287   if (CGM.getLangOpts().OMPHostIRFile.empty())
3288     return;
3289 
3290   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3291   if (auto EC = Buf.getError()) {
3292     CGM.getDiags().Report(diag::err_cannot_open_file)
3293         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3294     return;
3295   }
3296 
3297   llvm::LLVMContext C;
3298   auto ME = expectedToErrorOrAndEmitErrors(
3299       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3300 
3301   if (auto EC = ME.getError()) {
3302     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3303         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3304     CGM.getDiags().Report(DiagID)
3305         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3306     return;
3307   }
3308 
3309   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3310   if (!MD)
3311     return;
3312 
3313   for (llvm::MDNode *MN : MD->operands()) {
3314     auto &&GetMDInt = [MN](unsigned Idx) {
3315       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3316       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3317     };
3318 
3319     auto &&GetMDString = [MN](unsigned Idx) {
3320       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3321       return V->getString();
3322     };
3323 
3324     switch (GetMDInt(0)) {
3325     default:
3326       llvm_unreachable("Unexpected metadata!");
3327       break;
3328     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3329         OffloadingEntryInfoTargetRegion:
3330       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3331           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3332           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3333           /*Order=*/GetMDInt(5));
3334       break;
3335     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3336         OffloadingEntryInfoDeviceGlobalVar:
3337       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3338           /*MangledName=*/GetMDString(1),
3339           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3340               /*Flags=*/GetMDInt(2)),
3341           /*Order=*/GetMDInt(3));
3342       break;
3343     }
3344   }
3345 }
3346 
3347 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3348   if (!KmpRoutineEntryPtrTy) {
3349     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3350     ASTContext &C = CGM.getContext();
3351     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3352     FunctionProtoType::ExtProtoInfo EPI;
3353     KmpRoutineEntryPtrQTy = C.getPointerType(
3354         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3355     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3356   }
3357 }
3358 
3359 namespace {
3360 struct PrivateHelpersTy {
3361   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3362                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3363       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3364         PrivateElemInit(PrivateElemInit) {}
3365   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3366   const Expr *OriginalRef = nullptr;
3367   const VarDecl *Original = nullptr;
3368   const VarDecl *PrivateCopy = nullptr;
3369   const VarDecl *PrivateElemInit = nullptr;
3370   bool isLocalPrivate() const {
3371     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3372   }
3373 };
3374 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3375 } // anonymous namespace
3376 
3377 static bool isAllocatableDecl(const VarDecl *VD) {
3378   const VarDecl *CVD = VD->getCanonicalDecl();
3379   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3380     return false;
3381   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3382   // Use the default allocation.
3383   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3384            !AA->getAllocator());
3385 }
3386 
3387 static RecordDecl *
3388 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3389   if (!Privates.empty()) {
3390     ASTContext &C = CGM.getContext();
3391     // Build struct .kmp_privates_t. {
3392     //         /*  private vars  */
3393     //       };
3394     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3395     RD->startDefinition();
3396     for (const auto &Pair : Privates) {
3397       const VarDecl *VD = Pair.second.Original;
3398       QualType Type = VD->getType().getNonReferenceType();
3399       // If the private variable is a local variable with lvalue ref type,
3400       // allocate the pointer instead of the pointee type.
3401       if (Pair.second.isLocalPrivate()) {
3402         if (VD->getType()->isLValueReferenceType())
3403           Type = C.getPointerType(Type);
3404         if (isAllocatableDecl(VD))
3405           Type = C.getPointerType(Type);
3406       }
3407       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3408       if (VD->hasAttrs()) {
3409         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3410              E(VD->getAttrs().end());
3411              I != E; ++I)
3412           FD->addAttr(*I);
3413       }
3414     }
3415     RD->completeDefinition();
3416     return RD;
3417   }
3418   return nullptr;
3419 }
3420 
3421 static RecordDecl *
3422 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3423                          QualType KmpInt32Ty,
3424                          QualType KmpRoutineEntryPointerQTy) {
3425   ASTContext &C = CGM.getContext();
3426   // Build struct kmp_task_t {
3427   //         void *              shareds;
3428   //         kmp_routine_entry_t routine;
3429   //         kmp_int32           part_id;
3430   //         kmp_cmplrdata_t data1;
3431   //         kmp_cmplrdata_t data2;
3432   // For taskloops additional fields:
3433   //         kmp_uint64          lb;
3434   //         kmp_uint64          ub;
3435   //         kmp_int64           st;
3436   //         kmp_int32           liter;
3437   //         void *              reductions;
3438   //       };
3439   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3440   UD->startDefinition();
3441   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3442   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3443   UD->completeDefinition();
3444   QualType KmpCmplrdataTy = C.getRecordType(UD);
3445   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3446   RD->startDefinition();
3447   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3448   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3449   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3450   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3451   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3452   if (isOpenMPTaskLoopDirective(Kind)) {
3453     QualType KmpUInt64Ty =
3454         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3455     QualType KmpInt64Ty =
3456         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3457     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3458     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3459     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3460     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3461     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3462   }
3463   RD->completeDefinition();
3464   return RD;
3465 }
3466 
3467 static RecordDecl *
3468 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3469                                      ArrayRef<PrivateDataTy> Privates) {
3470   ASTContext &C = CGM.getContext();
3471   // Build struct kmp_task_t_with_privates {
3472   //         kmp_task_t task_data;
3473   //         .kmp_privates_t. privates;
3474   //       };
3475   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3476   RD->startDefinition();
3477   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3478   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3479     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3480   RD->completeDefinition();
3481   return RD;
3482 }
3483 
3484 /// Emit a proxy function which accepts kmp_task_t as the second
3485 /// argument.
3486 /// \code
3487 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3488 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3489 ///   For taskloops:
3490 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3491 ///   tt->reductions, tt->shareds);
3492 ///   return 0;
3493 /// }
3494 /// \endcode
3495 static llvm::Function *
3496 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3497                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3498                       QualType KmpTaskTWithPrivatesPtrQTy,
3499                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3500                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3501                       llvm::Value *TaskPrivatesMap) {
3502   ASTContext &C = CGM.getContext();
3503   FunctionArgList Args;
3504   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3505                             ImplicitParamDecl::Other);
3506   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3507                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3508                                 ImplicitParamDecl::Other);
3509   Args.push_back(&GtidArg);
3510   Args.push_back(&TaskTypeArg);
3511   const auto &TaskEntryFnInfo =
3512       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3513   llvm::FunctionType *TaskEntryTy =
3514       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3515   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3516   auto *TaskEntry = llvm::Function::Create(
3517       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3518   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3519   TaskEntry->setDoesNotRecurse();
3520   CodeGenFunction CGF(CGM);
3521   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3522                     Loc, Loc);
3523 
3524   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3525   // tt,
3526   // For taskloops:
3527   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3528   // tt->task_data.shareds);
3529   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3530       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3531   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3532       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3533       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3534   const auto *KmpTaskTWithPrivatesQTyRD =
3535       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3536   LValue Base =
3537       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3538   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3539   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3540   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3541   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3542 
3543   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3544   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3545   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3546       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3547       CGF.ConvertTypeForMem(SharedsPtrTy));
3548 
3549   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3550   llvm::Value *PrivatesParam;
3551   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3552     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3553     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3554         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3555   } else {
3556     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3557   }
3558 
3559   llvm::Value *CommonArgs[] = {
3560       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3561       CGF.Builder
3562           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3563                                                CGF.VoidPtrTy, CGF.Int8Ty)
3564           .getPointer()};
3565   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3566                                           std::end(CommonArgs));
3567   if (isOpenMPTaskLoopDirective(Kind)) {
3568     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3569     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3570     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3571     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3572     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3573     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3574     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3575     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3576     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3577     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3578     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3579     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3580     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3581     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3582     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3583     CallArgs.push_back(LBParam);
3584     CallArgs.push_back(UBParam);
3585     CallArgs.push_back(StParam);
3586     CallArgs.push_back(LIParam);
3587     CallArgs.push_back(RParam);
3588   }
3589   CallArgs.push_back(SharedsParam);
3590 
3591   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3592                                                   CallArgs);
3593   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3594                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3595   CGF.FinishFunction();
3596   return TaskEntry;
3597 }
3598 
3599 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3600                                             SourceLocation Loc,
3601                                             QualType KmpInt32Ty,
3602                                             QualType KmpTaskTWithPrivatesPtrQTy,
3603                                             QualType KmpTaskTWithPrivatesQTy) {
3604   ASTContext &C = CGM.getContext();
3605   FunctionArgList Args;
3606   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3607                             ImplicitParamDecl::Other);
3608   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3609                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3610                                 ImplicitParamDecl::Other);
3611   Args.push_back(&GtidArg);
3612   Args.push_back(&TaskTypeArg);
3613   const auto &DestructorFnInfo =
3614       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3615   llvm::FunctionType *DestructorFnTy =
3616       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3617   std::string Name =
3618       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3619   auto *DestructorFn =
3620       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3621                              Name, &CGM.getModule());
3622   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3623                                     DestructorFnInfo);
3624   DestructorFn->setDoesNotRecurse();
3625   CodeGenFunction CGF(CGM);
3626   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3627                     Args, Loc, Loc);
3628 
3629   LValue Base = CGF.EmitLoadOfPointerLValue(
3630       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3631       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3632   const auto *KmpTaskTWithPrivatesQTyRD =
3633       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3634   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3635   Base = CGF.EmitLValueForField(Base, *FI);
3636   for (const auto *Field :
3637        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3638     if (QualType::DestructionKind DtorKind =
3639             Field->getType().isDestructedType()) {
3640       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3641       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3642     }
3643   }
3644   CGF.FinishFunction();
3645   return DestructorFn;
3646 }
3647 
3648 /// Emit a privates mapping function for correct handling of private and
3649 /// firstprivate variables.
3650 /// \code
3651 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3652 /// **noalias priv1,...,  <tyn> **noalias privn) {
3653 ///   *priv1 = &.privates.priv1;
3654 ///   ...;
3655 ///   *privn = &.privates.privn;
3656 /// }
3657 /// \endcode
3658 static llvm::Value *
3659 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3660                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3661                                ArrayRef<PrivateDataTy> Privates) {
3662   ASTContext &C = CGM.getContext();
3663   FunctionArgList Args;
3664   ImplicitParamDecl TaskPrivatesArg(
3665       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3666       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3667       ImplicitParamDecl::Other);
3668   Args.push_back(&TaskPrivatesArg);
3669   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3670   unsigned Counter = 1;
3671   for (const Expr *E : Data.PrivateVars) {
3672     Args.push_back(ImplicitParamDecl::Create(
3673         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3674         C.getPointerType(C.getPointerType(E->getType()))
3675             .withConst()
3676             .withRestrict(),
3677         ImplicitParamDecl::Other));
3678     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3679     PrivateVarsPos[VD] = Counter;
3680     ++Counter;
3681   }
3682   for (const Expr *E : Data.FirstprivateVars) {
3683     Args.push_back(ImplicitParamDecl::Create(
3684         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3685         C.getPointerType(C.getPointerType(E->getType()))
3686             .withConst()
3687             .withRestrict(),
3688         ImplicitParamDecl::Other));
3689     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3690     PrivateVarsPos[VD] = Counter;
3691     ++Counter;
3692   }
3693   for (const Expr *E : Data.LastprivateVars) {
3694     Args.push_back(ImplicitParamDecl::Create(
3695         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3696         C.getPointerType(C.getPointerType(E->getType()))
3697             .withConst()
3698             .withRestrict(),
3699         ImplicitParamDecl::Other));
3700     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3701     PrivateVarsPos[VD] = Counter;
3702     ++Counter;
3703   }
3704   for (const VarDecl *VD : Data.PrivateLocals) {
3705     QualType Ty = VD->getType().getNonReferenceType();
3706     if (VD->getType()->isLValueReferenceType())
3707       Ty = C.getPointerType(Ty);
3708     if (isAllocatableDecl(VD))
3709       Ty = C.getPointerType(Ty);
3710     Args.push_back(ImplicitParamDecl::Create(
3711         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3712         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3713         ImplicitParamDecl::Other));
3714     PrivateVarsPos[VD] = Counter;
3715     ++Counter;
3716   }
3717   const auto &TaskPrivatesMapFnInfo =
3718       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3719   llvm::FunctionType *TaskPrivatesMapTy =
3720       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3721   std::string Name =
3722       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3723   auto *TaskPrivatesMap = llvm::Function::Create(
3724       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3725       &CGM.getModule());
3726   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3727                                     TaskPrivatesMapFnInfo);
3728   if (CGM.getLangOpts().Optimize) {
3729     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3730     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3731     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3732   }
3733   CodeGenFunction CGF(CGM);
3734   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3735                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3736 
3737   // *privi = &.privates.privi;
3738   LValue Base = CGF.EmitLoadOfPointerLValue(
3739       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3740       TaskPrivatesArg.getType()->castAs<PointerType>());
3741   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3742   Counter = 0;
3743   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3744     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3745     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3746     LValue RefLVal =
3747         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3748     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3749         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3750     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3751     ++Counter;
3752   }
3753   CGF.FinishFunction();
3754   return TaskPrivatesMap;
3755 }
3756 
3757 /// Emit initialization for private variables in task-based directives.
3758 static void emitPrivatesInit(CodeGenFunction &CGF,
3759                              const OMPExecutableDirective &D,
3760                              Address KmpTaskSharedsPtr, LValue TDBase,
3761                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3762                              QualType SharedsTy, QualType SharedsPtrTy,
3763                              const OMPTaskDataTy &Data,
3764                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3765   ASTContext &C = CGF.getContext();
3766   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3767   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3768   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3769                                  ? OMPD_taskloop
3770                                  : OMPD_task;
3771   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3772   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3773   LValue SrcBase;
3774   bool IsTargetTask =
3775       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3776       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3777   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3778   // PointersArray, SizesArray, and MappersArray. The original variables for
3779   // these arrays are not captured and we get their addresses explicitly.
3780   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3781       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3782     SrcBase = CGF.MakeAddrLValue(
3783         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3784             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3785             CGF.ConvertTypeForMem(SharedsTy)),
3786         SharedsTy);
3787   }
3788   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3789   for (const PrivateDataTy &Pair : Privates) {
3790     // Do not initialize private locals.
3791     if (Pair.second.isLocalPrivate()) {
3792       ++FI;
3793       continue;
3794     }
3795     const VarDecl *VD = Pair.second.PrivateCopy;
3796     const Expr *Init = VD->getAnyInitializer();
3797     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3798                              !CGF.isTrivialInitializer(Init)))) {
3799       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3800       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3801         const VarDecl *OriginalVD = Pair.second.Original;
3802         // Check if the variable is the target-based BasePointersArray,
3803         // PointersArray, SizesArray, or MappersArray.
3804         LValue SharedRefLValue;
3805         QualType Type = PrivateLValue.getType();
3806         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3807         if (IsTargetTask && !SharedField) {
3808           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3809                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3810                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3811                          ->getNumParams() == 0 &&
3812                  isa<TranslationUnitDecl>(
3813                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3814                          ->getDeclContext()) &&
3815                  "Expected artificial target data variable.");
3816           SharedRefLValue =
3817               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3818         } else if (ForDup) {
3819           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3820           SharedRefLValue = CGF.MakeAddrLValue(
3821               SharedRefLValue.getAddress(CGF).withAlignment(
3822                   C.getDeclAlign(OriginalVD)),
3823               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3824               SharedRefLValue.getTBAAInfo());
3825         } else if (CGF.LambdaCaptureFields.count(
3826                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3827                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3828           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3829         } else {
3830           // Processing for implicitly captured variables.
3831           InlinedOpenMPRegionRAII Region(
3832               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3833               /*HasCancel=*/false, /*NoInheritance=*/true);
3834           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3835         }
3836         if (Type->isArrayType()) {
3837           // Initialize firstprivate array.
3838           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3839             // Perform simple memcpy.
3840             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3841           } else {
3842             // Initialize firstprivate array using element-by-element
3843             // initialization.
3844             CGF.EmitOMPAggregateAssign(
3845                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3846                 Type,
3847                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3848                                                   Address SrcElement) {
3849                   // Clean up any temporaries needed by the initialization.
3850                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3851                   InitScope.addPrivate(Elem, SrcElement);
3852                   (void)InitScope.Privatize();
3853                   // Emit initialization for single element.
3854                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3855                       CGF, &CapturesInfo);
3856                   CGF.EmitAnyExprToMem(Init, DestElement,
3857                                        Init->getType().getQualifiers(),
3858                                        /*IsInitializer=*/false);
3859                 });
3860           }
3861         } else {
3862           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3863           InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3864           (void)InitScope.Privatize();
3865           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3866           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3867                              /*capturedByInit=*/false);
3868         }
3869       } else {
3870         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3871       }
3872     }
3873     ++FI;
3874   }
3875 }
3876 
3877 /// Check if duplication function is required for taskloops.
3878 static bool checkInitIsRequired(CodeGenFunction &CGF,
3879                                 ArrayRef<PrivateDataTy> Privates) {
3880   bool InitRequired = false;
3881   for (const PrivateDataTy &Pair : Privates) {
3882     if (Pair.second.isLocalPrivate())
3883       continue;
3884     const VarDecl *VD = Pair.second.PrivateCopy;
3885     const Expr *Init = VD->getAnyInitializer();
3886     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3887                                     !CGF.isTrivialInitializer(Init));
3888     if (InitRequired)
3889       break;
3890   }
3891   return InitRequired;
3892 }
3893 
3894 
3895 /// Emit task_dup function (for initialization of
3896 /// private/firstprivate/lastprivate vars and last_iter flag)
3897 /// \code
3898 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3899 /// lastpriv) {
3900 /// // setup lastprivate flag
3901 ///    task_dst->last = lastpriv;
3902 /// // could be constructor calls here...
3903 /// }
3904 /// \endcode
3905 static llvm::Value *
3906 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3907                     const OMPExecutableDirective &D,
3908                     QualType KmpTaskTWithPrivatesPtrQTy,
3909                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3910                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3911                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3912                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3913   ASTContext &C = CGM.getContext();
3914   FunctionArgList Args;
3915   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3916                            KmpTaskTWithPrivatesPtrQTy,
3917                            ImplicitParamDecl::Other);
3918   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3919                            KmpTaskTWithPrivatesPtrQTy,
3920                            ImplicitParamDecl::Other);
3921   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3922                                 ImplicitParamDecl::Other);
3923   Args.push_back(&DstArg);
3924   Args.push_back(&SrcArg);
3925   Args.push_back(&LastprivArg);
3926   const auto &TaskDupFnInfo =
3927       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3928   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3929   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3930   auto *TaskDup = llvm::Function::Create(
3931       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3932   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3933   TaskDup->setDoesNotRecurse();
3934   CodeGenFunction CGF(CGM);
3935   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3936                     Loc);
3937 
3938   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3939       CGF.GetAddrOfLocalVar(&DstArg),
3940       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3941   // task_dst->liter = lastpriv;
3942   if (WithLastIter) {
3943     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3944     LValue Base = CGF.EmitLValueForField(
3945         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3946     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3947     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3948         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3949     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3950   }
3951 
3952   // Emit initial values for private copies (if any).
3953   assert(!Privates.empty());
3954   Address KmpTaskSharedsPtr = Address::invalid();
3955   if (!Data.FirstprivateVars.empty()) {
3956     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3957         CGF.GetAddrOfLocalVar(&SrcArg),
3958         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3959     LValue Base = CGF.EmitLValueForField(
3960         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3961     KmpTaskSharedsPtr = Address(
3962         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3963                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3964                                                   KmpTaskTShareds)),
3965                              Loc),
3966         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3967   }
3968   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3969                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3970   CGF.FinishFunction();
3971   return TaskDup;
3972 }
3973 
3974 /// Checks if destructor function is required to be generated.
3975 /// \return true if cleanups are required, false otherwise.
3976 static bool
3977 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3978                          ArrayRef<PrivateDataTy> Privates) {
3979   for (const PrivateDataTy &P : Privates) {
3980     if (P.second.isLocalPrivate())
3981       continue;
3982     QualType Ty = P.second.Original->getType().getNonReferenceType();
3983     if (Ty.isDestructedType())
3984       return true;
3985   }
3986   return false;
3987 }
3988 
3989 namespace {
3990 /// Loop generator for OpenMP iterator expression.
3991 class OMPIteratorGeneratorScope final
3992     : public CodeGenFunction::OMPPrivateScope {
3993   CodeGenFunction &CGF;
3994   const OMPIteratorExpr *E = nullptr;
3995   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3996   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3997   OMPIteratorGeneratorScope() = delete;
3998   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3999 
4000 public:
4001   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4002       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4003     if (!E)
4004       return;
4005     SmallVector<llvm::Value *, 4> Uppers;
4006     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4007       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4008       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4009       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
4010       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4011       addPrivate(
4012           HelperData.CounterVD,
4013           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
4014     }
4015     Privatize();
4016 
4017     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4018       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4019       LValue CLVal =
4020           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4021                              HelperData.CounterVD->getType());
4022       // Counter = 0;
4023       CGF.EmitStoreOfScalar(
4024           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4025           CLVal);
4026       CodeGenFunction::JumpDest &ContDest =
4027           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4028       CodeGenFunction::JumpDest &ExitDest =
4029           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4030       // N = <number-of_iterations>;
4031       llvm::Value *N = Uppers[I];
4032       // cont:
4033       // if (Counter < N) goto body; else goto exit;
4034       CGF.EmitBlock(ContDest.getBlock());
4035       auto *CVal =
4036           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4037       llvm::Value *Cmp =
4038           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4039               ? CGF.Builder.CreateICmpSLT(CVal, N)
4040               : CGF.Builder.CreateICmpULT(CVal, N);
4041       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4042       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4043       // body:
4044       CGF.EmitBlock(BodyBB);
4045       // Iteri = Begini + Counter * Stepi;
4046       CGF.EmitIgnoredExpr(HelperData.Update);
4047     }
4048   }
4049   ~OMPIteratorGeneratorScope() {
4050     if (!E)
4051       return;
4052     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4053       // Counter = Counter + 1;
4054       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4055       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4056       // goto cont;
4057       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4058       // exit:
4059       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4060     }
4061   }
4062 };
4063 } // namespace
4064 
4065 static std::pair<llvm::Value *, llvm::Value *>
4066 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4067   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4068   llvm::Value *Addr;
4069   if (OASE) {
4070     const Expr *Base = OASE->getBase();
4071     Addr = CGF.EmitScalarExpr(Base);
4072   } else {
4073     Addr = CGF.EmitLValue(E).getPointer(CGF);
4074   }
4075   llvm::Value *SizeVal;
4076   QualType Ty = E->getType();
4077   if (OASE) {
4078     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4079     for (const Expr *SE : OASE->getDimensions()) {
4080       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4081       Sz = CGF.EmitScalarConversion(
4082           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4083       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4084     }
4085   } else if (const auto *ASE =
4086                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4087     LValue UpAddrLVal =
4088         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4089     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4090     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4091         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4092     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4093     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4094     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4095   } else {
4096     SizeVal = CGF.getTypeSize(Ty);
4097   }
4098   return std::make_pair(Addr, SizeVal);
4099 }
4100 
4101 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4102 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4103   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4104   if (KmpTaskAffinityInfoTy.isNull()) {
4105     RecordDecl *KmpAffinityInfoRD =
4106         C.buildImplicitRecord("kmp_task_affinity_info_t");
4107     KmpAffinityInfoRD->startDefinition();
4108     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4109     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4110     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4111     KmpAffinityInfoRD->completeDefinition();
4112     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4113   }
4114 }
4115 
4116 CGOpenMPRuntime::TaskResultTy
4117 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4118                               const OMPExecutableDirective &D,
4119                               llvm::Function *TaskFunction, QualType SharedsTy,
4120                               Address Shareds, const OMPTaskDataTy &Data) {
4121   ASTContext &C = CGM.getContext();
4122   llvm::SmallVector<PrivateDataTy, 4> Privates;
4123   // Aggregate privates and sort them by the alignment.
4124   const auto *I = Data.PrivateCopies.begin();
4125   for (const Expr *E : Data.PrivateVars) {
4126     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4127     Privates.emplace_back(
4128         C.getDeclAlign(VD),
4129         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4130                          /*PrivateElemInit=*/nullptr));
4131     ++I;
4132   }
4133   I = Data.FirstprivateCopies.begin();
4134   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4135   for (const Expr *E : Data.FirstprivateVars) {
4136     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4137     Privates.emplace_back(
4138         C.getDeclAlign(VD),
4139         PrivateHelpersTy(
4140             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4141             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4142     ++I;
4143     ++IElemInitRef;
4144   }
4145   I = Data.LastprivateCopies.begin();
4146   for (const Expr *E : Data.LastprivateVars) {
4147     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4148     Privates.emplace_back(
4149         C.getDeclAlign(VD),
4150         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4151                          /*PrivateElemInit=*/nullptr));
4152     ++I;
4153   }
4154   for (const VarDecl *VD : Data.PrivateLocals) {
4155     if (isAllocatableDecl(VD))
4156       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4157     else
4158       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4159   }
4160   llvm::stable_sort(Privates,
4161                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4162                       return L.first > R.first;
4163                     });
4164   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4165   // Build type kmp_routine_entry_t (if not built yet).
4166   emitKmpRoutineEntryT(KmpInt32Ty);
4167   // Build type kmp_task_t (if not built yet).
4168   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4169     if (SavedKmpTaskloopTQTy.isNull()) {
4170       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4171           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4172     }
4173     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4174   } else {
4175     assert((D.getDirectiveKind() == OMPD_task ||
4176             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4177             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4178            "Expected taskloop, task or target directive");
4179     if (SavedKmpTaskTQTy.isNull()) {
4180       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4181           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4182     }
4183     KmpTaskTQTy = SavedKmpTaskTQTy;
4184   }
4185   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4186   // Build particular struct kmp_task_t for the given task.
4187   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4188       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4189   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4190   QualType KmpTaskTWithPrivatesPtrQTy =
4191       C.getPointerType(KmpTaskTWithPrivatesQTy);
4192   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4193   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4194       KmpTaskTWithPrivatesTy->getPointerTo();
4195   llvm::Value *KmpTaskTWithPrivatesTySize =
4196       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4197   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4198 
4199   // Emit initial values for private copies (if any).
4200   llvm::Value *TaskPrivatesMap = nullptr;
4201   llvm::Type *TaskPrivatesMapTy =
4202       std::next(TaskFunction->arg_begin(), 3)->getType();
4203   if (!Privates.empty()) {
4204     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4205     TaskPrivatesMap =
4206         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4207     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4208         TaskPrivatesMap, TaskPrivatesMapTy);
4209   } else {
4210     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4211         cast<llvm::PointerType>(TaskPrivatesMapTy));
4212   }
4213   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4214   // kmp_task_t *tt);
4215   llvm::Function *TaskEntry = emitProxyTaskFunction(
4216       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4217       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4218       TaskPrivatesMap);
4219 
4220   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4221   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4222   // kmp_routine_entry_t *task_entry);
4223   // Task flags. Format is taken from
4224   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4225   // description of kmp_tasking_flags struct.
4226   enum {
4227     TiedFlag = 0x1,
4228     FinalFlag = 0x2,
4229     DestructorsFlag = 0x8,
4230     PriorityFlag = 0x20,
4231     DetachableFlag = 0x40,
4232   };
4233   unsigned Flags = Data.Tied ? TiedFlag : 0;
4234   bool NeedsCleanup = false;
4235   if (!Privates.empty()) {
4236     NeedsCleanup =
4237         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4238     if (NeedsCleanup)
4239       Flags = Flags | DestructorsFlag;
4240   }
4241   if (Data.Priority.getInt())
4242     Flags = Flags | PriorityFlag;
4243   if (D.hasClausesOfKind<OMPDetachClause>())
4244     Flags = Flags | DetachableFlag;
4245   llvm::Value *TaskFlags =
4246       Data.Final.getPointer()
4247           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4248                                      CGF.Builder.getInt32(FinalFlag),
4249                                      CGF.Builder.getInt32(/*C=*/0))
4250           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4251   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4252   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4253   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4254       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4255       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4256           TaskEntry, KmpRoutineEntryPtrTy)};
4257   llvm::Value *NewTask;
4258   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4259     // Check if we have any device clause associated with the directive.
4260     const Expr *Device = nullptr;
4261     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4262       Device = C->getDevice();
4263     // Emit device ID if any otherwise use default value.
4264     llvm::Value *DeviceID;
4265     if (Device)
4266       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4267                                            CGF.Int64Ty, /*isSigned=*/true);
4268     else
4269       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4270     AllocArgs.push_back(DeviceID);
4271     NewTask = CGF.EmitRuntimeCall(
4272         OMPBuilder.getOrCreateRuntimeFunction(
4273             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4274         AllocArgs);
4275   } else {
4276     NewTask =
4277         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4278                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4279                             AllocArgs);
4280   }
4281   // Emit detach clause initialization.
4282   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4283   // task_descriptor);
4284   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4285     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4286     LValue EvtLVal = CGF.EmitLValue(Evt);
4287 
4288     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4289     // int gtid, kmp_task_t *task);
4290     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4291     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4292     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4293     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4294         OMPBuilder.getOrCreateRuntimeFunction(
4295             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4296         {Loc, Tid, NewTask});
4297     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4298                                       Evt->getExprLoc());
4299     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4300   }
4301   // Process affinity clauses.
4302   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4303     // Process list of affinity data.
4304     ASTContext &C = CGM.getContext();
4305     Address AffinitiesArray = Address::invalid();
4306     // Calculate number of elements to form the array of affinity data.
4307     llvm::Value *NumOfElements = nullptr;
4308     unsigned NumAffinities = 0;
4309     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4310       if (const Expr *Modifier = C->getModifier()) {
4311         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4312         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4313           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4314           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4315           NumOfElements =
4316               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4317         }
4318       } else {
4319         NumAffinities += C->varlist_size();
4320       }
4321     }
4322     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4323     // Fields ids in kmp_task_affinity_info record.
4324     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4325 
4326     QualType KmpTaskAffinityInfoArrayTy;
4327     if (NumOfElements) {
4328       NumOfElements = CGF.Builder.CreateNUWAdd(
4329           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4330       auto *OVE = new (C) OpaqueValueExpr(
4331           Loc,
4332           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4333           VK_PRValue);
4334       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4335                                                     RValue::get(NumOfElements));
4336       KmpTaskAffinityInfoArrayTy =
4337           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4338                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4339       // Properly emit variable-sized array.
4340       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4341                                            ImplicitParamDecl::Other);
4342       CGF.EmitVarDecl(*PD);
4343       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4344       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4345                                                 /*isSigned=*/false);
4346     } else {
4347       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4348           KmpTaskAffinityInfoTy,
4349           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4350           ArrayType::Normal, /*IndexTypeQuals=*/0);
4351       AffinitiesArray =
4352           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4353       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4354       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4355                                              /*isSigned=*/false);
4356     }
4357 
4358     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4359     // Fill array by elements without iterators.
4360     unsigned Pos = 0;
4361     bool HasIterator = false;
4362     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4363       if (C->getModifier()) {
4364         HasIterator = true;
4365         continue;
4366       }
4367       for (const Expr *E : C->varlists()) {
4368         llvm::Value *Addr;
4369         llvm::Value *Size;
4370         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4371         LValue Base =
4372             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4373                                KmpTaskAffinityInfoTy);
4374         // affs[i].base_addr = &<Affinities[i].second>;
4375         LValue BaseAddrLVal = CGF.EmitLValueForField(
4376             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4377         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4378                               BaseAddrLVal);
4379         // affs[i].len = sizeof(<Affinities[i].second>);
4380         LValue LenLVal = CGF.EmitLValueForField(
4381             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4382         CGF.EmitStoreOfScalar(Size, LenLVal);
4383         ++Pos;
4384       }
4385     }
4386     LValue PosLVal;
4387     if (HasIterator) {
4388       PosLVal = CGF.MakeAddrLValue(
4389           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4390           C.getSizeType());
4391       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4392     }
4393     // Process elements with iterators.
4394     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4395       const Expr *Modifier = C->getModifier();
4396       if (!Modifier)
4397         continue;
4398       OMPIteratorGeneratorScope IteratorScope(
4399           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4400       for (const Expr *E : C->varlists()) {
4401         llvm::Value *Addr;
4402         llvm::Value *Size;
4403         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4404         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4405         LValue Base = CGF.MakeAddrLValue(
4406             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4407         // affs[i].base_addr = &<Affinities[i].second>;
4408         LValue BaseAddrLVal = CGF.EmitLValueForField(
4409             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4410         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4411                               BaseAddrLVal);
4412         // affs[i].len = sizeof(<Affinities[i].second>);
4413         LValue LenLVal = CGF.EmitLValueForField(
4414             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4415         CGF.EmitStoreOfScalar(Size, LenLVal);
4416         Idx = CGF.Builder.CreateNUWAdd(
4417             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4418         CGF.EmitStoreOfScalar(Idx, PosLVal);
4419       }
4420     }
4421     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4422     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4423     // naffins, kmp_task_affinity_info_t *affin_list);
4424     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4425     llvm::Value *GTid = getThreadID(CGF, Loc);
4426     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4427         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4428     // FIXME: Emit the function and ignore its result for now unless the
4429     // runtime function is properly implemented.
4430     (void)CGF.EmitRuntimeCall(
4431         OMPBuilder.getOrCreateRuntimeFunction(
4432             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4433         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4434   }
4435   llvm::Value *NewTaskNewTaskTTy =
4436       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4437           NewTask, KmpTaskTWithPrivatesPtrTy);
4438   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4439                                                KmpTaskTWithPrivatesQTy);
4440   LValue TDBase =
4441       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4442   // Fill the data in the resulting kmp_task_t record.
4443   // Copy shareds if there are any.
4444   Address KmpTaskSharedsPtr = Address::invalid();
4445   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4446     KmpTaskSharedsPtr = Address(
4447         CGF.EmitLoadOfScalar(
4448             CGF.EmitLValueForField(
4449                 TDBase,
4450                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4451             Loc),
4452         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4453     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4454     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4455     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4456   }
4457   // Emit initial values for private copies (if any).
4458   TaskResultTy Result;
4459   if (!Privates.empty()) {
4460     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4461                      SharedsTy, SharedsPtrTy, Data, Privates,
4462                      /*ForDup=*/false);
4463     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4464         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4465       Result.TaskDupFn = emitTaskDupFunction(
4466           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4467           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4468           /*WithLastIter=*/!Data.LastprivateVars.empty());
4469     }
4470   }
4471   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4472   enum { Priority = 0, Destructors = 1 };
4473   // Provide pointer to function with destructors for privates.
4474   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4475   const RecordDecl *KmpCmplrdataUD =
4476       (*FI)->getType()->getAsUnionType()->getDecl();
4477   if (NeedsCleanup) {
4478     llvm::Value *DestructorFn = emitDestructorsFunction(
4479         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4480         KmpTaskTWithPrivatesQTy);
4481     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4482     LValue DestructorsLV = CGF.EmitLValueForField(
4483         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4484     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4485                               DestructorFn, KmpRoutineEntryPtrTy),
4486                           DestructorsLV);
4487   }
4488   // Set priority.
4489   if (Data.Priority.getInt()) {
4490     LValue Data2LV = CGF.EmitLValueForField(
4491         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4492     LValue PriorityLV = CGF.EmitLValueForField(
4493         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4494     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4495   }
4496   Result.NewTask = NewTask;
4497   Result.TaskEntry = TaskEntry;
4498   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4499   Result.TDBase = TDBase;
4500   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4501   return Result;
4502 }
4503 
4504 namespace {
4505 /// Dependence kind for RTL.
4506 enum RTLDependenceKindTy {
4507   DepIn = 0x01,
4508   DepInOut = 0x3,
4509   DepMutexInOutSet = 0x4,
4510   DepInOutSet = 0x8,
4511   DepOmpAllMem = 0x80,
4512 };
4513 /// Fields ids in kmp_depend_info record.
4514 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4515 } // namespace
4516 
4517 /// Translates internal dependency kind into the runtime kind.
4518 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4519   RTLDependenceKindTy DepKind;
4520   switch (K) {
4521   case OMPC_DEPEND_in:
4522     DepKind = DepIn;
4523     break;
4524   // Out and InOut dependencies must use the same code.
4525   case OMPC_DEPEND_out:
4526   case OMPC_DEPEND_inout:
4527     DepKind = DepInOut;
4528     break;
4529   case OMPC_DEPEND_mutexinoutset:
4530     DepKind = DepMutexInOutSet;
4531     break;
4532   case OMPC_DEPEND_inoutset:
4533     DepKind = DepInOutSet;
4534     break;
4535   case OMPC_DEPEND_outallmemory:
4536     DepKind = DepOmpAllMem;
4537     break;
4538   case OMPC_DEPEND_source:
4539   case OMPC_DEPEND_sink:
4540   case OMPC_DEPEND_depobj:
4541   case OMPC_DEPEND_inoutallmemory:
4542   case OMPC_DEPEND_unknown:
4543     llvm_unreachable("Unknown task dependence type");
4544   }
4545   return DepKind;
4546 }
4547 
4548 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4549 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4550                            QualType &FlagsTy) {
4551   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4552   if (KmpDependInfoTy.isNull()) {
4553     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4554     KmpDependInfoRD->startDefinition();
4555     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4556     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4557     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4558     KmpDependInfoRD->completeDefinition();
4559     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4560   }
4561 }
4562 
4563 std::pair<llvm::Value *, LValue>
4564 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4565                                    SourceLocation Loc) {
4566   ASTContext &C = CGM.getContext();
4567   QualType FlagsTy;
4568   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4569   RecordDecl *KmpDependInfoRD =
4570       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4571   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4572   LValue Base = CGF.EmitLoadOfPointerLValue(
4573       CGF.Builder.CreateElementBitCast(
4574           DepobjLVal.getAddress(CGF),
4575           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4576       KmpDependInfoPtrTy->castAs<PointerType>());
4577   Address DepObjAddr = CGF.Builder.CreateGEP(
4578       Base.getAddress(CGF),
4579       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4580   LValue NumDepsBase = CGF.MakeAddrLValue(
4581       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4582   // NumDeps = deps[i].base_addr;
4583   LValue BaseAddrLVal = CGF.EmitLValueForField(
4584       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4585   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4586   return std::make_pair(NumDeps, Base);
4587 }
4588 
4589 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4590                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4591                            const OMPTaskDataTy::DependData &Data,
4592                            Address DependenciesArray) {
4593   CodeGenModule &CGM = CGF.CGM;
4594   ASTContext &C = CGM.getContext();
4595   QualType FlagsTy;
4596   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4597   RecordDecl *KmpDependInfoRD =
4598       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4599   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4600 
4601   OMPIteratorGeneratorScope IteratorScope(
4602       CGF, cast_or_null<OMPIteratorExpr>(
4603                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4604                                  : nullptr));
4605   for (const Expr *E : Data.DepExprs) {
4606     llvm::Value *Addr;
4607     llvm::Value *Size;
4608 
4609     // The expression will be a nullptr in the 'omp_all_memory' case.
4610     if (E) {
4611       std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4612       Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4613     } else {
4614       Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4615       Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4616     }
4617     LValue Base;
4618     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4619       Base = CGF.MakeAddrLValue(
4620           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4621     } else {
4622       assert(E && "Expected a non-null expression");
4623       LValue &PosLVal = *Pos.get<LValue *>();
4624       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4625       Base = CGF.MakeAddrLValue(
4626           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4627     }
4628     // deps[i].base_addr = &<Dependencies[i].second>;
4629     LValue BaseAddrLVal = CGF.EmitLValueForField(
4630         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4631     CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4632     // deps[i].len = sizeof(<Dependencies[i].second>);
4633     LValue LenLVal = CGF.EmitLValueForField(
4634         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4635     CGF.EmitStoreOfScalar(Size, LenLVal);
4636     // deps[i].flags = <Dependencies[i].first>;
4637     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4638     LValue FlagsLVal = CGF.EmitLValueForField(
4639         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4640     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4641                           FlagsLVal);
4642     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4643       ++(*P);
4644     } else {
4645       LValue &PosLVal = *Pos.get<LValue *>();
4646       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4647       Idx = CGF.Builder.CreateNUWAdd(Idx,
4648                                      llvm::ConstantInt::get(Idx->getType(), 1));
4649       CGF.EmitStoreOfScalar(Idx, PosLVal);
4650     }
4651   }
4652 }
4653 
4654 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4655     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4656     const OMPTaskDataTy::DependData &Data) {
4657   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4658          "Expected depobj dependecy kind.");
4659   SmallVector<llvm::Value *, 4> Sizes;
4660   SmallVector<LValue, 4> SizeLVals;
4661   ASTContext &C = CGF.getContext();
4662   {
4663     OMPIteratorGeneratorScope IteratorScope(
4664         CGF, cast_or_null<OMPIteratorExpr>(
4665                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4666                                    : nullptr));
4667     for (const Expr *E : Data.DepExprs) {
4668       llvm::Value *NumDeps;
4669       LValue Base;
4670       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4671       std::tie(NumDeps, Base) =
4672           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4673       LValue NumLVal = CGF.MakeAddrLValue(
4674           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4675           C.getUIntPtrType());
4676       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4677                               NumLVal.getAddress(CGF));
4678       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4679       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4680       CGF.EmitStoreOfScalar(Add, NumLVal);
4681       SizeLVals.push_back(NumLVal);
4682     }
4683   }
4684   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4685     llvm::Value *Size =
4686         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4687     Sizes.push_back(Size);
4688   }
4689   return Sizes;
4690 }
4691 
4692 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4693                                          QualType &KmpDependInfoTy,
4694                                          LValue PosLVal,
4695                                          const OMPTaskDataTy::DependData &Data,
4696                                          Address DependenciesArray) {
4697   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4698          "Expected depobj dependecy kind.");
4699   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4700   {
4701     OMPIteratorGeneratorScope IteratorScope(
4702         CGF, cast_or_null<OMPIteratorExpr>(
4703                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4704                                    : nullptr));
4705     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4706       const Expr *E = Data.DepExprs[I];
4707       llvm::Value *NumDeps;
4708       LValue Base;
4709       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4710       std::tie(NumDeps, Base) =
4711           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4712 
4713       // memcopy dependency data.
4714       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4715           ElSize,
4716           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4717       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4718       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4719       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4720 
4721       // Increase pos.
4722       // pos += size;
4723       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4724       CGF.EmitStoreOfScalar(Add, PosLVal);
4725     }
4726   }
4727 }
4728 
4729 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4730     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4731     SourceLocation Loc) {
4732   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4733         return D.DepExprs.empty();
4734       }))
4735     return std::make_pair(nullptr, Address::invalid());
4736   // Process list of dependencies.
4737   ASTContext &C = CGM.getContext();
4738   Address DependenciesArray = Address::invalid();
4739   llvm::Value *NumOfElements = nullptr;
4740   unsigned NumDependencies = std::accumulate(
4741       Dependencies.begin(), Dependencies.end(), 0,
4742       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4743         return D.DepKind == OMPC_DEPEND_depobj
4744                    ? V
4745                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4746       });
4747   QualType FlagsTy;
4748   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4749   bool HasDepobjDeps = false;
4750   bool HasRegularWithIterators = false;
4751   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4752   llvm::Value *NumOfRegularWithIterators =
4753       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4754   // Calculate number of depobj dependecies and regular deps with the iterators.
4755   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4756     if (D.DepKind == OMPC_DEPEND_depobj) {
4757       SmallVector<llvm::Value *, 4> Sizes =
4758           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4759       for (llvm::Value *Size : Sizes) {
4760         NumOfDepobjElements =
4761             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4762       }
4763       HasDepobjDeps = true;
4764       continue;
4765     }
4766     // Include number of iterations, if any.
4767 
4768     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4769       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4770         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4771         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4772         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4773             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4774         NumOfRegularWithIterators =
4775             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4776       }
4777       HasRegularWithIterators = true;
4778       continue;
4779     }
4780   }
4781 
4782   QualType KmpDependInfoArrayTy;
4783   if (HasDepobjDeps || HasRegularWithIterators) {
4784     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4785                                            /*isSigned=*/false);
4786     if (HasDepobjDeps) {
4787       NumOfElements =
4788           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4789     }
4790     if (HasRegularWithIterators) {
4791       NumOfElements =
4792           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4793     }
4794     auto *OVE = new (C) OpaqueValueExpr(
4795         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4796         VK_PRValue);
4797     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4798                                                   RValue::get(NumOfElements));
4799     KmpDependInfoArrayTy =
4800         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4801                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4802     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4803     // Properly emit variable-sized array.
4804     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4805                                          ImplicitParamDecl::Other);
4806     CGF.EmitVarDecl(*PD);
4807     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4808     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4809                                               /*isSigned=*/false);
4810   } else {
4811     KmpDependInfoArrayTy = C.getConstantArrayType(
4812         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4813         ArrayType::Normal, /*IndexTypeQuals=*/0);
4814     DependenciesArray =
4815         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4816     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4817     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4818                                            /*isSigned=*/false);
4819   }
4820   unsigned Pos = 0;
4821   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4822     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4823         Dependencies[I].IteratorExpr)
4824       continue;
4825     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4826                    DependenciesArray);
4827   }
4828   // Copy regular dependecies with iterators.
4829   LValue PosLVal = CGF.MakeAddrLValue(
4830       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4831   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4832   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4833     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4834         !Dependencies[I].IteratorExpr)
4835       continue;
4836     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4837                    DependenciesArray);
4838   }
4839   // Copy final depobj arrays without iterators.
4840   if (HasDepobjDeps) {
4841     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4842       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4843         continue;
4844       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4845                          DependenciesArray);
4846     }
4847   }
4848   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4849       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4850   return std::make_pair(NumOfElements, DependenciesArray);
4851 }
4852 
4853 Address CGOpenMPRuntime::emitDepobjDependClause(
4854     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4855     SourceLocation Loc) {
4856   if (Dependencies.DepExprs.empty())
4857     return Address::invalid();
4858   // Process list of dependencies.
4859   ASTContext &C = CGM.getContext();
4860   Address DependenciesArray = Address::invalid();
4861   unsigned NumDependencies = Dependencies.DepExprs.size();
4862   QualType FlagsTy;
4863   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4864   RecordDecl *KmpDependInfoRD =
4865       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4866 
4867   llvm::Value *Size;
4868   // Define type kmp_depend_info[<Dependencies.size()>];
4869   // For depobj reserve one extra element to store the number of elements.
4870   // It is required to handle depobj(x) update(in) construct.
4871   // kmp_depend_info[<Dependencies.size()>] deps;
4872   llvm::Value *NumDepsVal;
4873   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4874   if (const auto *IE =
4875           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4876     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4877     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4878       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4879       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4880       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4881     }
4882     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4883                                     NumDepsVal);
4884     CharUnits SizeInBytes =
4885         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4886     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4887     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4888     NumDepsVal =
4889         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4890   } else {
4891     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4892         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4893         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4894     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4895     Size = CGM.getSize(Sz.alignTo(Align));
4896     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4897   }
4898   // Need to allocate on the dynamic memory.
4899   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4900   // Use default allocator.
4901   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4902   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4903 
4904   llvm::Value *Addr =
4905       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4906                               CGM.getModule(), OMPRTL___kmpc_alloc),
4907                           Args, ".dep.arr.addr");
4908   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4909   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4910       Addr, KmpDependInfoLlvmTy->getPointerTo());
4911   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4912   // Write number of elements in the first element of array for depobj.
4913   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4914   // deps[i].base_addr = NumDependencies;
4915   LValue BaseAddrLVal = CGF.EmitLValueForField(
4916       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4917   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4918   llvm::PointerUnion<unsigned *, LValue *> Pos;
4919   unsigned Idx = 1;
4920   LValue PosLVal;
4921   if (Dependencies.IteratorExpr) {
4922     PosLVal = CGF.MakeAddrLValue(
4923         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4924         C.getSizeType());
4925     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4926                           /*IsInit=*/true);
4927     Pos = &PosLVal;
4928   } else {
4929     Pos = &Idx;
4930   }
4931   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4932   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4933       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4934       CGF.Int8Ty);
4935   return DependenciesArray;
4936 }
4937 
4938 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4939                                         SourceLocation Loc) {
4940   ASTContext &C = CGM.getContext();
4941   QualType FlagsTy;
4942   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4943   LValue Base = CGF.EmitLoadOfPointerLValue(
4944       DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4945   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4946   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4947       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4948       CGF.ConvertTypeForMem(KmpDependInfoTy));
4949   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4950       Addr.getElementType(), Addr.getPointer(),
4951       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4952   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4953                                                                CGF.VoidPtrTy);
4954   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4955   // Use default allocator.
4956   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4957   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4958 
4959   // _kmpc_free(gtid, addr, nullptr);
4960   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4961                                 CGM.getModule(), OMPRTL___kmpc_free),
4962                             Args);
4963 }
4964 
4965 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4966                                        OpenMPDependClauseKind NewDepKind,
4967                                        SourceLocation Loc) {
4968   ASTContext &C = CGM.getContext();
4969   QualType FlagsTy;
4970   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4971   RecordDecl *KmpDependInfoRD =
4972       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4973   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4974   llvm::Value *NumDeps;
4975   LValue Base;
4976   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4977 
4978   Address Begin = Base.getAddress(CGF);
4979   // Cast from pointer to array type to pointer to single element.
4980   llvm::Value *End = CGF.Builder.CreateGEP(
4981       Begin.getElementType(), Begin.getPointer(), NumDeps);
4982   // The basic structure here is a while-do loop.
4983   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4984   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4985   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4986   CGF.EmitBlock(BodyBB);
4987   llvm::PHINode *ElementPHI =
4988       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4989   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4990   Begin = Begin.withPointer(ElementPHI);
4991   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4992                             Base.getTBAAInfo());
4993   // deps[i].flags = NewDepKind;
4994   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4995   LValue FlagsLVal = CGF.EmitLValueForField(
4996       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4997   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4998                         FlagsLVal);
4999 
5000   // Shift the address forward by one element.
5001   Address ElementNext =
5002       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5003   ElementPHI->addIncoming(ElementNext.getPointer(),
5004                           CGF.Builder.GetInsertBlock());
5005   llvm::Value *IsEmpty =
5006       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5007   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5008   // Done.
5009   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5010 }
5011 
5012 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5013                                    const OMPExecutableDirective &D,
5014                                    llvm::Function *TaskFunction,
5015                                    QualType SharedsTy, Address Shareds,
5016                                    const Expr *IfCond,
5017                                    const OMPTaskDataTy &Data) {
5018   if (!CGF.HaveInsertPoint())
5019     return;
5020 
5021   TaskResultTy Result =
5022       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5023   llvm::Value *NewTask = Result.NewTask;
5024   llvm::Function *TaskEntry = Result.TaskEntry;
5025   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5026   LValue TDBase = Result.TDBase;
5027   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5028   // Process list of dependences.
5029   Address DependenciesArray = Address::invalid();
5030   llvm::Value *NumOfElements;
5031   std::tie(NumOfElements, DependenciesArray) =
5032       emitDependClause(CGF, Data.Dependences, Loc);
5033 
5034   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5035   // libcall.
5036   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5037   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5038   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5039   // list is not empty
5040   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5041   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5042   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5043   llvm::Value *DepTaskArgs[7];
5044   if (!Data.Dependences.empty()) {
5045     DepTaskArgs[0] = UpLoc;
5046     DepTaskArgs[1] = ThreadID;
5047     DepTaskArgs[2] = NewTask;
5048     DepTaskArgs[3] = NumOfElements;
5049     DepTaskArgs[4] = DependenciesArray.getPointer();
5050     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5051     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5052   }
5053   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5054                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5055     if (!Data.Tied) {
5056       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5057       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5058       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5059     }
5060     if (!Data.Dependences.empty()) {
5061       CGF.EmitRuntimeCall(
5062           OMPBuilder.getOrCreateRuntimeFunction(
5063               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5064           DepTaskArgs);
5065     } else {
5066       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5067                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5068                           TaskArgs);
5069     }
5070     // Check if parent region is untied and build return for untied task;
5071     if (auto *Region =
5072             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5073       Region->emitUntiedSwitch(CGF);
5074   };
5075 
5076   llvm::Value *DepWaitTaskArgs[6];
5077   if (!Data.Dependences.empty()) {
5078     DepWaitTaskArgs[0] = UpLoc;
5079     DepWaitTaskArgs[1] = ThreadID;
5080     DepWaitTaskArgs[2] = NumOfElements;
5081     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5082     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5083     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5084   }
5085   auto &M = CGM.getModule();
5086   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5087                         TaskEntry, &Data, &DepWaitTaskArgs,
5088                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5089     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5090     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5091     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5092     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5093     // is specified.
5094     if (!Data.Dependences.empty())
5095       CGF.EmitRuntimeCall(
5096           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5097           DepWaitTaskArgs);
5098     // Call proxy_task_entry(gtid, new_task);
5099     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5100                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5101       Action.Enter(CGF);
5102       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5103       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5104                                                           OutlinedFnArgs);
5105     };
5106 
5107     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5108     // kmp_task_t *new_task);
5109     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5110     // kmp_task_t *new_task);
5111     RegionCodeGenTy RCG(CodeGen);
5112     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5113                               M, OMPRTL___kmpc_omp_task_begin_if0),
5114                           TaskArgs,
5115                           OMPBuilder.getOrCreateRuntimeFunction(
5116                               M, OMPRTL___kmpc_omp_task_complete_if0),
5117                           TaskArgs);
5118     RCG.setAction(Action);
5119     RCG(CGF);
5120   };
5121 
5122   if (IfCond) {
5123     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5124   } else {
5125     RegionCodeGenTy ThenRCG(ThenCodeGen);
5126     ThenRCG(CGF);
5127   }
5128 }
5129 
5130 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5131                                        const OMPLoopDirective &D,
5132                                        llvm::Function *TaskFunction,
5133                                        QualType SharedsTy, Address Shareds,
5134                                        const Expr *IfCond,
5135                                        const OMPTaskDataTy &Data) {
5136   if (!CGF.HaveInsertPoint())
5137     return;
5138   TaskResultTy Result =
5139       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5140   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5141   // libcall.
5142   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5143   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5144   // sched, kmp_uint64 grainsize, void *task_dup);
5145   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5146   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5147   llvm::Value *IfVal;
5148   if (IfCond) {
5149     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5150                                       /*isSigned=*/true);
5151   } else {
5152     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5153   }
5154 
5155   LValue LBLVal = CGF.EmitLValueForField(
5156       Result.TDBase,
5157       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5158   const auto *LBVar =
5159       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5160   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5161                        LBLVal.getQuals(),
5162                        /*IsInitializer=*/true);
5163   LValue UBLVal = CGF.EmitLValueForField(
5164       Result.TDBase,
5165       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5166   const auto *UBVar =
5167       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5168   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5169                        UBLVal.getQuals(),
5170                        /*IsInitializer=*/true);
5171   LValue StLVal = CGF.EmitLValueForField(
5172       Result.TDBase,
5173       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5174   const auto *StVar =
5175       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5176   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5177                        StLVal.getQuals(),
5178                        /*IsInitializer=*/true);
5179   // Store reductions address.
5180   LValue RedLVal = CGF.EmitLValueForField(
5181       Result.TDBase,
5182       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5183   if (Data.Reductions) {
5184     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5185   } else {
5186     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5187                                CGF.getContext().VoidPtrTy);
5188   }
5189   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5190   llvm::Value *TaskArgs[] = {
5191       UpLoc,
5192       ThreadID,
5193       Result.NewTask,
5194       IfVal,
5195       LBLVal.getPointer(CGF),
5196       UBLVal.getPointer(CGF),
5197       CGF.EmitLoadOfScalar(StLVal, Loc),
5198       llvm::ConstantInt::getSigned(
5199           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5200       llvm::ConstantInt::getSigned(
5201           CGF.IntTy, Data.Schedule.getPointer()
5202                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5203                          : NoSchedule),
5204       Data.Schedule.getPointer()
5205           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5206                                       /*isSigned=*/false)
5207           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5208       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5209                              Result.TaskDupFn, CGF.VoidPtrTy)
5210                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5211   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5212                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5213                       TaskArgs);
5214 }
5215 
5216 /// Emit reduction operation for each element of array (required for
5217 /// array sections) LHS op = RHS.
5218 /// \param Type Type of array.
5219 /// \param LHSVar Variable on the left side of the reduction operation
5220 /// (references element of array in original variable).
5221 /// \param RHSVar Variable on the right side of the reduction operation
5222 /// (references element of array in original variable).
5223 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5224 /// RHSVar.
5225 static void EmitOMPAggregateReduction(
5226     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5227     const VarDecl *RHSVar,
5228     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5229                                   const Expr *, const Expr *)> &RedOpGen,
5230     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5231     const Expr *UpExpr = nullptr) {
5232   // Perform element-by-element initialization.
5233   QualType ElementTy;
5234   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5235   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5236 
5237   // Drill down to the base element type on both arrays.
5238   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5239   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5240 
5241   llvm::Value *RHSBegin = RHSAddr.getPointer();
5242   llvm::Value *LHSBegin = LHSAddr.getPointer();
5243   // Cast from pointer to array type to pointer to single element.
5244   llvm::Value *LHSEnd =
5245       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5246   // The basic structure here is a while-do loop.
5247   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5248   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5249   llvm::Value *IsEmpty =
5250       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5251   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5252 
5253   // Enter the loop body, making that address the current address.
5254   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5255   CGF.EmitBlock(BodyBB);
5256 
5257   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5258 
5259   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5260       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5261   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5262   Address RHSElementCurrent(
5263       RHSElementPHI, RHSAddr.getElementType(),
5264       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5265 
5266   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5267       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5268   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5269   Address LHSElementCurrent(
5270       LHSElementPHI, LHSAddr.getElementType(),
5271       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5272 
5273   // Emit copy.
5274   CodeGenFunction::OMPPrivateScope Scope(CGF);
5275   Scope.addPrivate(LHSVar, LHSElementCurrent);
5276   Scope.addPrivate(RHSVar, RHSElementCurrent);
5277   Scope.Privatize();
5278   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5279   Scope.ForceCleanup();
5280 
5281   // Shift the address forward by one element.
5282   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5283       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5284       "omp.arraycpy.dest.element");
5285   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5286       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5287       "omp.arraycpy.src.element");
5288   // Check whether we've reached the end.
5289   llvm::Value *Done =
5290       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5291   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5292   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5293   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5294 
5295   // Done.
5296   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5297 }
5298 
5299 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5300 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5301 /// UDR combiner function.
5302 static void emitReductionCombiner(CodeGenFunction &CGF,
5303                                   const Expr *ReductionOp) {
5304   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5305     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5306       if (const auto *DRE =
5307               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5308         if (const auto *DRD =
5309                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5310           std::pair<llvm::Function *, llvm::Function *> Reduction =
5311               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5312           RValue Func = RValue::get(Reduction.first);
5313           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5314           CGF.EmitIgnoredExpr(ReductionOp);
5315           return;
5316         }
5317   CGF.EmitIgnoredExpr(ReductionOp);
5318 }
5319 
5320 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5321     SourceLocation Loc, llvm::Type *ArgsElemType,
5322     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5323     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5324   ASTContext &C = CGM.getContext();
5325 
5326   // void reduction_func(void *LHSArg, void *RHSArg);
5327   FunctionArgList Args;
5328   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5329                            ImplicitParamDecl::Other);
5330   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5331                            ImplicitParamDecl::Other);
5332   Args.push_back(&LHSArg);
5333   Args.push_back(&RHSArg);
5334   const auto &CGFI =
5335       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5336   std::string Name = getName({"omp", "reduction", "reduction_func"});
5337   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5338                                     llvm::GlobalValue::InternalLinkage, Name,
5339                                     &CGM.getModule());
5340   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5341   Fn->setDoesNotRecurse();
5342   CodeGenFunction CGF(CGM);
5343   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5344 
5345   // Dst = (void*[n])(LHSArg);
5346   // Src = (void*[n])(RHSArg);
5347   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5348                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5349                   ArgsElemType->getPointerTo()),
5350               ArgsElemType, CGF.getPointerAlign());
5351   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5352                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5353                   ArgsElemType->getPointerTo()),
5354               ArgsElemType, CGF.getPointerAlign());
5355 
5356   //  ...
5357   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5358   //  ...
5359   CodeGenFunction::OMPPrivateScope Scope(CGF);
5360   const auto *IPriv = Privates.begin();
5361   unsigned Idx = 0;
5362   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5363     const auto *RHSVar =
5364         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5365     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5366     const auto *LHSVar =
5367         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5368     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5369     QualType PrivTy = (*IPriv)->getType();
5370     if (PrivTy->isVariablyModifiedType()) {
5371       // Get array size and emit VLA type.
5372       ++Idx;
5373       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5374       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5375       const VariableArrayType *VLA =
5376           CGF.getContext().getAsVariableArrayType(PrivTy);
5377       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5378       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5379           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5380       CGF.EmitVariablyModifiedType(PrivTy);
5381     }
5382   }
5383   Scope.Privatize();
5384   IPriv = Privates.begin();
5385   const auto *ILHS = LHSExprs.begin();
5386   const auto *IRHS = RHSExprs.begin();
5387   for (const Expr *E : ReductionOps) {
5388     if ((*IPriv)->getType()->isArrayType()) {
5389       // Emit reduction for array section.
5390       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5391       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5392       EmitOMPAggregateReduction(
5393           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5394           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5395             emitReductionCombiner(CGF, E);
5396           });
5397     } else {
5398       // Emit reduction for array subscript or single variable.
5399       emitReductionCombiner(CGF, E);
5400     }
5401     ++IPriv;
5402     ++ILHS;
5403     ++IRHS;
5404   }
5405   Scope.ForceCleanup();
5406   CGF.FinishFunction();
5407   return Fn;
5408 }
5409 
5410 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5411                                                   const Expr *ReductionOp,
5412                                                   const Expr *PrivateRef,
5413                                                   const DeclRefExpr *LHS,
5414                                                   const DeclRefExpr *RHS) {
5415   if (PrivateRef->getType()->isArrayType()) {
5416     // Emit reduction for array section.
5417     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5418     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5419     EmitOMPAggregateReduction(
5420         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5421         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5422           emitReductionCombiner(CGF, ReductionOp);
5423         });
5424   } else {
5425     // Emit reduction for array subscript or single variable.
5426     emitReductionCombiner(CGF, ReductionOp);
5427   }
5428 }
5429 
5430 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5431                                     ArrayRef<const Expr *> Privates,
5432                                     ArrayRef<const Expr *> LHSExprs,
5433                                     ArrayRef<const Expr *> RHSExprs,
5434                                     ArrayRef<const Expr *> ReductionOps,
5435                                     ReductionOptionsTy Options) {
5436   if (!CGF.HaveInsertPoint())
5437     return;
5438 
5439   bool WithNowait = Options.WithNowait;
5440   bool SimpleReduction = Options.SimpleReduction;
5441 
5442   // Next code should be emitted for reduction:
5443   //
5444   // static kmp_critical_name lock = { 0 };
5445   //
5446   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5447   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5448   //  ...
5449   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5450   //  *(Type<n>-1*)rhs[<n>-1]);
5451   // }
5452   //
5453   // ...
5454   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5455   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5456   // RedList, reduce_func, &<lock>)) {
5457   // case 1:
5458   //  ...
5459   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5460   //  ...
5461   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5462   // break;
5463   // case 2:
5464   //  ...
5465   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5466   //  ...
5467   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5468   // break;
5469   // default:;
5470   // }
5471   //
5472   // if SimpleReduction is true, only the next code is generated:
5473   //  ...
5474   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5475   //  ...
5476 
5477   ASTContext &C = CGM.getContext();
5478 
5479   if (SimpleReduction) {
5480     CodeGenFunction::RunCleanupsScope Scope(CGF);
5481     const auto *IPriv = Privates.begin();
5482     const auto *ILHS = LHSExprs.begin();
5483     const auto *IRHS = RHSExprs.begin();
5484     for (const Expr *E : ReductionOps) {
5485       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5486                                   cast<DeclRefExpr>(*IRHS));
5487       ++IPriv;
5488       ++ILHS;
5489       ++IRHS;
5490     }
5491     return;
5492   }
5493 
5494   // 1. Build a list of reduction variables.
5495   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5496   auto Size = RHSExprs.size();
5497   for (const Expr *E : Privates) {
5498     if (E->getType()->isVariablyModifiedType())
5499       // Reserve place for array size.
5500       ++Size;
5501   }
5502   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5503   QualType ReductionArrayTy =
5504       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5505                              /*IndexTypeQuals=*/0);
5506   Address ReductionList =
5507       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5508   const auto *IPriv = Privates.begin();
5509   unsigned Idx = 0;
5510   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5511     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5512     CGF.Builder.CreateStore(
5513         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5514             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5515         Elem);
5516     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5517       // Store array size.
5518       ++Idx;
5519       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5520       llvm::Value *Size = CGF.Builder.CreateIntCast(
5521           CGF.getVLASize(
5522                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5523               .NumElts,
5524           CGF.SizeTy, /*isSigned=*/false);
5525       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5526                               Elem);
5527     }
5528   }
5529 
5530   // 2. Emit reduce_func().
5531   llvm::Function *ReductionFn =
5532       emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5533                             Privates, LHSExprs, RHSExprs, ReductionOps);
5534 
5535   // 3. Create static kmp_critical_name lock = { 0 };
5536   std::string Name = getName({"reduction"});
5537   llvm::Value *Lock = getCriticalRegionLock(Name);
5538 
5539   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5540   // RedList, reduce_func, &<lock>);
5541   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5542   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5543   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5544   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5545       ReductionList.getPointer(), CGF.VoidPtrTy);
5546   llvm::Value *Args[] = {
5547       IdentTLoc,                             // ident_t *<loc>
5548       ThreadId,                              // i32 <gtid>
5549       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5550       ReductionArrayTySize,                  // size_type sizeof(RedList)
5551       RL,                                    // void *RedList
5552       ReductionFn, // void (*) (void *, void *) <reduce_func>
5553       Lock         // kmp_critical_name *&<lock>
5554   };
5555   llvm::Value *Res = CGF.EmitRuntimeCall(
5556       OMPBuilder.getOrCreateRuntimeFunction(
5557           CGM.getModule(),
5558           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5559       Args);
5560 
5561   // 5. Build switch(res)
5562   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5563   llvm::SwitchInst *SwInst =
5564       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5565 
5566   // 6. Build case 1:
5567   //  ...
5568   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5569   //  ...
5570   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5571   // break;
5572   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5573   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5574   CGF.EmitBlock(Case1BB);
5575 
5576   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5577   llvm::Value *EndArgs[] = {
5578       IdentTLoc, // ident_t *<loc>
5579       ThreadId,  // i32 <gtid>
5580       Lock       // kmp_critical_name *&<lock>
5581   };
5582   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5583                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5584     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5585     const auto *IPriv = Privates.begin();
5586     const auto *ILHS = LHSExprs.begin();
5587     const auto *IRHS = RHSExprs.begin();
5588     for (const Expr *E : ReductionOps) {
5589       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5590                                      cast<DeclRefExpr>(*IRHS));
5591       ++IPriv;
5592       ++ILHS;
5593       ++IRHS;
5594     }
5595   };
5596   RegionCodeGenTy RCG(CodeGen);
5597   CommonActionTy Action(
5598       nullptr, llvm::None,
5599       OMPBuilder.getOrCreateRuntimeFunction(
5600           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5601                                       : OMPRTL___kmpc_end_reduce),
5602       EndArgs);
5603   RCG.setAction(Action);
5604   RCG(CGF);
5605 
5606   CGF.EmitBranch(DefaultBB);
5607 
5608   // 7. Build case 2:
5609   //  ...
5610   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5611   //  ...
5612   // break;
5613   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5614   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5615   CGF.EmitBlock(Case2BB);
5616 
5617   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5618                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5619     const auto *ILHS = LHSExprs.begin();
5620     const auto *IRHS = RHSExprs.begin();
5621     const auto *IPriv = Privates.begin();
5622     for (const Expr *E : ReductionOps) {
5623       const Expr *XExpr = nullptr;
5624       const Expr *EExpr = nullptr;
5625       const Expr *UpExpr = nullptr;
5626       BinaryOperatorKind BO = BO_Comma;
5627       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5628         if (BO->getOpcode() == BO_Assign) {
5629           XExpr = BO->getLHS();
5630           UpExpr = BO->getRHS();
5631         }
5632       }
5633       // Try to emit update expression as a simple atomic.
5634       const Expr *RHSExpr = UpExpr;
5635       if (RHSExpr) {
5636         // Analyze RHS part of the whole expression.
5637         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5638                 RHSExpr->IgnoreParenImpCasts())) {
5639           // If this is a conditional operator, analyze its condition for
5640           // min/max reduction operator.
5641           RHSExpr = ACO->getCond();
5642         }
5643         if (const auto *BORHS =
5644                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5645           EExpr = BORHS->getRHS();
5646           BO = BORHS->getOpcode();
5647         }
5648       }
5649       if (XExpr) {
5650         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5651         auto &&AtomicRedGen = [BO, VD,
5652                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5653                                     const Expr *EExpr, const Expr *UpExpr) {
5654           LValue X = CGF.EmitLValue(XExpr);
5655           RValue E;
5656           if (EExpr)
5657             E = CGF.EmitAnyExpr(EExpr);
5658           CGF.EmitOMPAtomicSimpleUpdateExpr(
5659               X, E, BO, /*IsXLHSInRHSPart=*/true,
5660               llvm::AtomicOrdering::Monotonic, Loc,
5661               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5662                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5663                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5664                 CGF.emitOMPSimpleStore(
5665                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5666                     VD->getType().getNonReferenceType(), Loc);
5667                 PrivateScope.addPrivate(VD, LHSTemp);
5668                 (void)PrivateScope.Privatize();
5669                 return CGF.EmitAnyExpr(UpExpr);
5670               });
5671         };
5672         if ((*IPriv)->getType()->isArrayType()) {
5673           // Emit atomic reduction for array section.
5674           const auto *RHSVar =
5675               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5676           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5677                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5678         } else {
5679           // Emit atomic reduction for array subscript or single variable.
5680           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5681         }
5682       } else {
5683         // Emit as a critical region.
5684         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5685                                            const Expr *, const Expr *) {
5686           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5687           std::string Name = RT.getName({"atomic_reduction"});
5688           RT.emitCriticalRegion(
5689               CGF, Name,
5690               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5691                 Action.Enter(CGF);
5692                 emitReductionCombiner(CGF, E);
5693               },
5694               Loc);
5695         };
5696         if ((*IPriv)->getType()->isArrayType()) {
5697           const auto *LHSVar =
5698               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5699           const auto *RHSVar =
5700               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5701           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5702                                     CritRedGen);
5703         } else {
5704           CritRedGen(CGF, nullptr, nullptr, nullptr);
5705         }
5706       }
5707       ++ILHS;
5708       ++IRHS;
5709       ++IPriv;
5710     }
5711   };
5712   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5713   if (!WithNowait) {
5714     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5715     llvm::Value *EndArgs[] = {
5716         IdentTLoc, // ident_t *<loc>
5717         ThreadId,  // i32 <gtid>
5718         Lock       // kmp_critical_name *&<lock>
5719     };
5720     CommonActionTy Action(nullptr, llvm::None,
5721                           OMPBuilder.getOrCreateRuntimeFunction(
5722                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5723                           EndArgs);
5724     AtomicRCG.setAction(Action);
5725     AtomicRCG(CGF);
5726   } else {
5727     AtomicRCG(CGF);
5728   }
5729 
5730   CGF.EmitBranch(DefaultBB);
5731   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5732 }
5733 
5734 /// Generates unique name for artificial threadprivate variables.
5735 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5736 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5737                                       const Expr *Ref) {
5738   SmallString<256> Buffer;
5739   llvm::raw_svector_ostream Out(Buffer);
5740   const clang::DeclRefExpr *DE;
5741   const VarDecl *D = ::getBaseDecl(Ref, DE);
5742   if (!D)
5743     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5744   D = D->getCanonicalDecl();
5745   std::string Name = CGM.getOpenMPRuntime().getName(
5746       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5747   Out << Prefix << Name << "_"
5748       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5749   return std::string(Out.str());
5750 }
5751 
5752 /// Emits reduction initializer function:
5753 /// \code
5754 /// void @.red_init(void* %arg, void* %orig) {
5755 /// %0 = bitcast void* %arg to <type>*
5756 /// store <type> <init>, <type>* %0
5757 /// ret void
5758 /// }
5759 /// \endcode
5760 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5761                                            SourceLocation Loc,
5762                                            ReductionCodeGen &RCG, unsigned N) {
5763   ASTContext &C = CGM.getContext();
5764   QualType VoidPtrTy = C.VoidPtrTy;
5765   VoidPtrTy.addRestrict();
5766   FunctionArgList Args;
5767   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5768                           ImplicitParamDecl::Other);
5769   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5770                               ImplicitParamDecl::Other);
5771   Args.emplace_back(&Param);
5772   Args.emplace_back(&ParamOrig);
5773   const auto &FnInfo =
5774       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5775   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5776   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5777   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5778                                     Name, &CGM.getModule());
5779   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5780   Fn->setDoesNotRecurse();
5781   CodeGenFunction CGF(CGM);
5782   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5783   QualType PrivateType = RCG.getPrivateType(N);
5784   Address PrivateAddr = CGF.EmitLoadOfPointer(
5785       CGF.Builder.CreateElementBitCast(
5786           CGF.GetAddrOfLocalVar(&Param),
5787           CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5788       C.getPointerType(PrivateType)->castAs<PointerType>());
5789   llvm::Value *Size = nullptr;
5790   // If the size of the reduction item is non-constant, load it from global
5791   // threadprivate variable.
5792   if (RCG.getSizes(N).second) {
5793     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5794         CGF, CGM.getContext().getSizeType(),
5795         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5796     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5797                                 CGM.getContext().getSizeType(), Loc);
5798   }
5799   RCG.emitAggregateType(CGF, N, Size);
5800   Address OrigAddr = Address::invalid();
5801   // If initializer uses initializer from declare reduction construct, emit a
5802   // pointer to the address of the original reduction item (reuired by reduction
5803   // initializer)
5804   if (RCG.usesReductionInitializer(N)) {
5805     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5806     OrigAddr = CGF.EmitLoadOfPointer(
5807         SharedAddr,
5808         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5809   }
5810   // Emit the initializer:
5811   // %0 = bitcast void* %arg to <type>*
5812   // store <type> <init>, <type>* %0
5813   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5814                          [](CodeGenFunction &) { return false; });
5815   CGF.FinishFunction();
5816   return Fn;
5817 }
5818 
5819 /// Emits reduction combiner function:
5820 /// \code
5821 /// void @.red_comb(void* %arg0, void* %arg1) {
5822 /// %lhs = bitcast void* %arg0 to <type>*
5823 /// %rhs = bitcast void* %arg1 to <type>*
5824 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5825 /// store <type> %2, <type>* %lhs
5826 /// ret void
5827 /// }
5828 /// \endcode
5829 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5830                                            SourceLocation Loc,
5831                                            ReductionCodeGen &RCG, unsigned N,
5832                                            const Expr *ReductionOp,
5833                                            const Expr *LHS, const Expr *RHS,
5834                                            const Expr *PrivateRef) {
5835   ASTContext &C = CGM.getContext();
5836   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5837   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5838   FunctionArgList Args;
5839   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5840                                C.VoidPtrTy, ImplicitParamDecl::Other);
5841   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5842                             ImplicitParamDecl::Other);
5843   Args.emplace_back(&ParamInOut);
5844   Args.emplace_back(&ParamIn);
5845   const auto &FnInfo =
5846       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5847   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5848   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5849   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5850                                     Name, &CGM.getModule());
5851   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5852   Fn->setDoesNotRecurse();
5853   CodeGenFunction CGF(CGM);
5854   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5855   llvm::Value *Size = nullptr;
5856   // If the size of the reduction item is non-constant, load it from global
5857   // threadprivate variable.
5858   if (RCG.getSizes(N).second) {
5859     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5860         CGF, CGM.getContext().getSizeType(),
5861         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5862     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5863                                 CGM.getContext().getSizeType(), Loc);
5864   }
5865   RCG.emitAggregateType(CGF, N, Size);
5866   // Remap lhs and rhs variables to the addresses of the function arguments.
5867   // %lhs = bitcast void* %arg0 to <type>*
5868   // %rhs = bitcast void* %arg1 to <type>*
5869   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5870   PrivateScope.addPrivate(
5871       LHSVD,
5872       // Pull out the pointer to the variable.
5873       CGF.EmitLoadOfPointer(
5874           CGF.Builder.CreateElementBitCast(
5875               CGF.GetAddrOfLocalVar(&ParamInOut),
5876               CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5877           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5878   PrivateScope.addPrivate(
5879       RHSVD,
5880       // Pull out the pointer to the variable.
5881       CGF.EmitLoadOfPointer(
5882           CGF.Builder.CreateElementBitCast(
5883             CGF.GetAddrOfLocalVar(&ParamIn),
5884             CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5885           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5886   PrivateScope.Privatize();
5887   // Emit the combiner body:
5888   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5889   // store <type> %2, <type>* %lhs
5890   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5891       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5892       cast<DeclRefExpr>(RHS));
5893   CGF.FinishFunction();
5894   return Fn;
5895 }
5896 
5897 /// Emits reduction finalizer function:
5898 /// \code
5899 /// void @.red_fini(void* %arg) {
5900 /// %0 = bitcast void* %arg to <type>*
5901 /// <destroy>(<type>* %0)
5902 /// ret void
5903 /// }
5904 /// \endcode
5905 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5906                                            SourceLocation Loc,
5907                                            ReductionCodeGen &RCG, unsigned N) {
5908   if (!RCG.needCleanups(N))
5909     return nullptr;
5910   ASTContext &C = CGM.getContext();
5911   FunctionArgList Args;
5912   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5913                           ImplicitParamDecl::Other);
5914   Args.emplace_back(&Param);
5915   const auto &FnInfo =
5916       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5917   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5918   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5919   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5920                                     Name, &CGM.getModule());
5921   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5922   Fn->setDoesNotRecurse();
5923   CodeGenFunction CGF(CGM);
5924   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5925   Address PrivateAddr = CGF.EmitLoadOfPointer(
5926       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5927   llvm::Value *Size = nullptr;
5928   // If the size of the reduction item is non-constant, load it from global
5929   // threadprivate variable.
5930   if (RCG.getSizes(N).second) {
5931     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5932         CGF, CGM.getContext().getSizeType(),
5933         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5934     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5935                                 CGM.getContext().getSizeType(), Loc);
5936   }
5937   RCG.emitAggregateType(CGF, N, Size);
5938   // Emit the finalizer body:
5939   // <destroy>(<type>* %0)
5940   RCG.emitCleanups(CGF, N, PrivateAddr);
5941   CGF.FinishFunction(Loc);
5942   return Fn;
5943 }
5944 
5945 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5946     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5947     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5948   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5949     return nullptr;
5950 
5951   // Build typedef struct:
5952   // kmp_taskred_input {
5953   //   void *reduce_shar; // shared reduction item
5954   //   void *reduce_orig; // original reduction item used for initialization
5955   //   size_t reduce_size; // size of data item
5956   //   void *reduce_init; // data initialization routine
5957   //   void *reduce_fini; // data finalization routine
5958   //   void *reduce_comb; // data combiner routine
5959   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5960   // } kmp_taskred_input_t;
5961   ASTContext &C = CGM.getContext();
5962   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5963   RD->startDefinition();
5964   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5965   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5966   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5967   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5968   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5969   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5970   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5971       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5972   RD->completeDefinition();
5973   QualType RDType = C.getRecordType(RD);
5974   unsigned Size = Data.ReductionVars.size();
5975   llvm::APInt ArraySize(/*numBits=*/64, Size);
5976   QualType ArrayRDType = C.getConstantArrayType(
5977       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5978   // kmp_task_red_input_t .rd_input.[Size];
5979   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5980   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5981                        Data.ReductionCopies, Data.ReductionOps);
5982   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5983     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5984     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5985                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5986     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5987         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5988         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5989         ".rd_input.gep.");
5990     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5991     // ElemLVal.reduce_shar = &Shareds[Cnt];
5992     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5993     RCG.emitSharedOrigLValue(CGF, Cnt);
5994     llvm::Value *CastedShared =
5995         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
5996     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5997     // ElemLVal.reduce_orig = &Origs[Cnt];
5998     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5999     llvm::Value *CastedOrig =
6000         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6001     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6002     RCG.emitAggregateType(CGF, Cnt);
6003     llvm::Value *SizeValInChars;
6004     llvm::Value *SizeVal;
6005     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6006     // We use delayed creation/initialization for VLAs and array sections. It is
6007     // required because runtime does not provide the way to pass the sizes of
6008     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6009     // threadprivate global variables are used to store these values and use
6010     // them in the functions.
6011     bool DelayedCreation = !!SizeVal;
6012     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6013                                                /*isSigned=*/false);
6014     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6015     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6016     // ElemLVal.reduce_init = init;
6017     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6018     llvm::Value *InitAddr =
6019         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6020     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6021     // ElemLVal.reduce_fini = fini;
6022     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6023     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6024     llvm::Value *FiniAddr = Fini
6025                                 ? CGF.EmitCastToVoidPtr(Fini)
6026                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6027     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6028     // ElemLVal.reduce_comb = comb;
6029     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6030     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6031         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6032         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6033     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6034     // ElemLVal.flags = 0;
6035     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6036     if (DelayedCreation) {
6037       CGF.EmitStoreOfScalar(
6038           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6039           FlagsLVal);
6040     } else
6041       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6042                                  FlagsLVal.getType());
6043   }
6044   if (Data.IsReductionWithTaskMod) {
6045     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6046     // is_ws, int num, void *data);
6047     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6048     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6049                                                   CGM.IntTy, /*isSigned=*/true);
6050     llvm::Value *Args[] = {
6051         IdentTLoc, GTid,
6052         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6053                                /*isSigned=*/true),
6054         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6055         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6056             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6057     return CGF.EmitRuntimeCall(
6058         OMPBuilder.getOrCreateRuntimeFunction(
6059             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6060         Args);
6061   }
6062   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6063   llvm::Value *Args[] = {
6064       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6065                                 /*isSigned=*/true),
6066       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6067       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6068                                                       CGM.VoidPtrTy)};
6069   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6070                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6071                              Args);
6072 }
6073 
6074 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6075                                             SourceLocation Loc,
6076                                             bool IsWorksharingReduction) {
6077   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6078   // is_ws, int num, void *data);
6079   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6080   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6081                                                 CGM.IntTy, /*isSigned=*/true);
6082   llvm::Value *Args[] = {IdentTLoc, GTid,
6083                          llvm::ConstantInt::get(CGM.IntTy,
6084                                                 IsWorksharingReduction ? 1 : 0,
6085                                                 /*isSigned=*/true)};
6086   (void)CGF.EmitRuntimeCall(
6087       OMPBuilder.getOrCreateRuntimeFunction(
6088           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6089       Args);
6090 }
6091 
6092 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6093                                               SourceLocation Loc,
6094                                               ReductionCodeGen &RCG,
6095                                               unsigned N) {
6096   auto Sizes = RCG.getSizes(N);
6097   // Emit threadprivate global variable if the type is non-constant
6098   // (Sizes.second = nullptr).
6099   if (Sizes.second) {
6100     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6101                                                      /*isSigned=*/false);
6102     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6103         CGF, CGM.getContext().getSizeType(),
6104         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6105     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6106   }
6107 }
6108 
6109 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6110                                               SourceLocation Loc,
6111                                               llvm::Value *ReductionsPtr,
6112                                               LValue SharedLVal) {
6113   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6114   // *d);
6115   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6116                                                    CGM.IntTy,
6117                                                    /*isSigned=*/true),
6118                          ReductionsPtr,
6119                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6120                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6121   return Address(
6122       CGF.EmitRuntimeCall(
6123           OMPBuilder.getOrCreateRuntimeFunction(
6124               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6125           Args),
6126       CGF.Int8Ty, SharedLVal.getAlignment());
6127 }
6128 
6129 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6130                                        const OMPTaskDataTy &Data) {
6131   if (!CGF.HaveInsertPoint())
6132     return;
6133 
6134   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6135     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6136     OMPBuilder.createTaskwait(CGF.Builder);
6137   } else {
6138     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6139     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6140     auto &M = CGM.getModule();
6141     Address DependenciesArray = Address::invalid();
6142     llvm::Value *NumOfElements;
6143     std::tie(NumOfElements, DependenciesArray) =
6144         emitDependClause(CGF, Data.Dependences, Loc);
6145     llvm::Value *DepWaitTaskArgs[6];
6146     if (!Data.Dependences.empty()) {
6147       DepWaitTaskArgs[0] = UpLoc;
6148       DepWaitTaskArgs[1] = ThreadID;
6149       DepWaitTaskArgs[2] = NumOfElements;
6150       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6151       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6152       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6153 
6154       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6155 
6156       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6157       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6158       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6159       // is specified.
6160       CGF.EmitRuntimeCall(
6161           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6162           DepWaitTaskArgs);
6163 
6164     } else {
6165 
6166       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6167       // global_tid);
6168       llvm::Value *Args[] = {UpLoc, ThreadID};
6169       // Ignore return result until untied tasks are supported.
6170       CGF.EmitRuntimeCall(
6171           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6172           Args);
6173     }
6174   }
6175 
6176   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6177     Region->emitUntiedSwitch(CGF);
6178 }
6179 
6180 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6181                                            OpenMPDirectiveKind InnerKind,
6182                                            const RegionCodeGenTy &CodeGen,
6183                                            bool HasCancel) {
6184   if (!CGF.HaveInsertPoint())
6185     return;
6186   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6187                                  InnerKind != OMPD_critical &&
6188                                      InnerKind != OMPD_master &&
6189                                      InnerKind != OMPD_masked);
6190   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6191 }
6192 
6193 namespace {
6194 enum RTCancelKind {
6195   CancelNoreq = 0,
6196   CancelParallel = 1,
6197   CancelLoop = 2,
6198   CancelSections = 3,
6199   CancelTaskgroup = 4
6200 };
6201 } // anonymous namespace
6202 
6203 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6204   RTCancelKind CancelKind = CancelNoreq;
6205   if (CancelRegion == OMPD_parallel)
6206     CancelKind = CancelParallel;
6207   else if (CancelRegion == OMPD_for)
6208     CancelKind = CancelLoop;
6209   else if (CancelRegion == OMPD_sections)
6210     CancelKind = CancelSections;
6211   else {
6212     assert(CancelRegion == OMPD_taskgroup);
6213     CancelKind = CancelTaskgroup;
6214   }
6215   return CancelKind;
6216 }
6217 
6218 void CGOpenMPRuntime::emitCancellationPointCall(
6219     CodeGenFunction &CGF, SourceLocation Loc,
6220     OpenMPDirectiveKind CancelRegion) {
6221   if (!CGF.HaveInsertPoint())
6222     return;
6223   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6224   // global_tid, kmp_int32 cncl_kind);
6225   if (auto *OMPRegionInfo =
6226           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6227     // For 'cancellation point taskgroup', the task region info may not have a
6228     // cancel. This may instead happen in another adjacent task.
6229     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6230       llvm::Value *Args[] = {
6231           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6232           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6233       // Ignore return result until untied tasks are supported.
6234       llvm::Value *Result = CGF.EmitRuntimeCall(
6235           OMPBuilder.getOrCreateRuntimeFunction(
6236               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6237           Args);
6238       // if (__kmpc_cancellationpoint()) {
6239       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6240       //   exit from construct;
6241       // }
6242       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6243       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6244       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6245       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6246       CGF.EmitBlock(ExitBB);
6247       if (CancelRegion == OMPD_parallel)
6248         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6249       // exit from construct;
6250       CodeGenFunction::JumpDest CancelDest =
6251           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6252       CGF.EmitBranchThroughCleanup(CancelDest);
6253       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6254     }
6255   }
6256 }
6257 
6258 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6259                                      const Expr *IfCond,
6260                                      OpenMPDirectiveKind CancelRegion) {
6261   if (!CGF.HaveInsertPoint())
6262     return;
6263   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6264   // kmp_int32 cncl_kind);
6265   auto &M = CGM.getModule();
6266   if (auto *OMPRegionInfo =
6267           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6268     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6269                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6270       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6271       llvm::Value *Args[] = {
6272           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6273           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6274       // Ignore return result until untied tasks are supported.
6275       llvm::Value *Result = CGF.EmitRuntimeCall(
6276           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6277       // if (__kmpc_cancel()) {
6278       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6279       //   exit from construct;
6280       // }
6281       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6282       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6283       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6284       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6285       CGF.EmitBlock(ExitBB);
6286       if (CancelRegion == OMPD_parallel)
6287         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6288       // exit from construct;
6289       CodeGenFunction::JumpDest CancelDest =
6290           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6291       CGF.EmitBranchThroughCleanup(CancelDest);
6292       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6293     };
6294     if (IfCond) {
6295       emitIfClause(CGF, IfCond, ThenGen,
6296                    [](CodeGenFunction &, PrePostActionTy &) {});
6297     } else {
6298       RegionCodeGenTy ThenRCG(ThenGen);
6299       ThenRCG(CGF);
6300     }
6301   }
6302 }
6303 
6304 namespace {
6305 /// Cleanup action for uses_allocators support.
6306 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6307   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6308 
6309 public:
6310   OMPUsesAllocatorsActionTy(
6311       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6312       : Allocators(Allocators) {}
6313   void Enter(CodeGenFunction &CGF) override {
6314     if (!CGF.HaveInsertPoint())
6315       return;
6316     for (const auto &AllocatorData : Allocators) {
6317       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6318           CGF, AllocatorData.first, AllocatorData.second);
6319     }
6320   }
6321   void Exit(CodeGenFunction &CGF) override {
6322     if (!CGF.HaveInsertPoint())
6323       return;
6324     for (const auto &AllocatorData : Allocators) {
6325       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6326                                                         AllocatorData.first);
6327     }
6328   }
6329 };
6330 } // namespace
6331 
6332 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6333     const OMPExecutableDirective &D, StringRef ParentName,
6334     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6335     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6336   assert(!ParentName.empty() && "Invalid target region parent name!");
6337   HasEmittedTargetRegion = true;
6338   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6339   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6340     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6341       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6342       if (!D.AllocatorTraits)
6343         continue;
6344       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6345     }
6346   }
6347   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6348   CodeGen.setAction(UsesAllocatorAction);
6349   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6350                                    IsOffloadEntry, CodeGen);
6351 }
6352 
6353 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6354                                              const Expr *Allocator,
6355                                              const Expr *AllocatorTraits) {
6356   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6357   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6358   // Use default memspace handle.
6359   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6360   llvm::Value *NumTraits = llvm::ConstantInt::get(
6361       CGF.IntTy, cast<ConstantArrayType>(
6362                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6363                      ->getSize()
6364                      .getLimitedValue());
6365   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6366   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6367       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6368   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6369                                            AllocatorTraitsLVal.getBaseInfo(),
6370                                            AllocatorTraitsLVal.getTBAAInfo());
6371   llvm::Value *Traits =
6372       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6373 
6374   llvm::Value *AllocatorVal =
6375       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6376                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6377                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6378   // Store to allocator.
6379   CGF.EmitVarDecl(*cast<VarDecl>(
6380       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6381   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6382   AllocatorVal =
6383       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6384                                Allocator->getType(), Allocator->getExprLoc());
6385   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6386 }
6387 
6388 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6389                                              const Expr *Allocator) {
6390   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6391   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6392   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6393   llvm::Value *AllocatorVal =
6394       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6395   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6396                                           CGF.getContext().VoidPtrTy,
6397                                           Allocator->getExprLoc());
6398   (void)CGF.EmitRuntimeCall(
6399       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6400                                             OMPRTL___kmpc_destroy_allocator),
6401       {ThreadId, AllocatorVal});
6402 }
6403 
6404 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6405     const OMPExecutableDirective &D, StringRef ParentName,
6406     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6407     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6408   // Create a unique name for the entry function using the source location
6409   // information of the current target region. The name will be something like:
6410   //
6411   // __omp_offloading_DD_FFFF_PP_lBB
6412   //
6413   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6414   // mangled name of the function that encloses the target region and BB is the
6415   // line number of the target region.
6416 
6417   const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6418                                !CGM.getLangOpts().OpenMPOffloadMandatory;
6419   unsigned DeviceID;
6420   unsigned FileID;
6421   unsigned Line;
6422   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6423                            Line);
6424   SmallString<64> EntryFnName;
6425   {
6426     llvm::raw_svector_ostream OS(EntryFnName);
6427     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6428        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6429   }
6430 
6431   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6432 
6433   CodeGenFunction CGF(CGM, true);
6434   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6435   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6436 
6437   if (BuildOutlinedFn)
6438     OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6439 
6440   // If this target outline function is not an offload entry, we don't need to
6441   // register it.
6442   if (!IsOffloadEntry)
6443     return;
6444 
6445   // The target region ID is used by the runtime library to identify the current
6446   // target region, so it only has to be unique and not necessarily point to
6447   // anything. It could be the pointer to the outlined function that implements
6448   // the target region, but we aren't using that so that the compiler doesn't
6449   // need to keep that, and could therefore inline the host function if proven
6450   // worthwhile during optimization. In the other hand, if emitting code for the
6451   // device, the ID has to be the function address so that it can retrieved from
6452   // the offloading entry and launched by the runtime library. We also mark the
6453   // outlined function to have external linkage in case we are emitting code for
6454   // the device, because these functions will be entry points to the device.
6455 
6456   if (CGM.getLangOpts().OpenMPIsDevice) {
6457     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6458     OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
6459     OutlinedFn->setDSOLocal(false);
6460     if (CGM.getTriple().isAMDGCN())
6461       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6462   } else {
6463     std::string Name = getName({EntryFnName, "region_id"});
6464     OutlinedFnID = new llvm::GlobalVariable(
6465         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6466         llvm::GlobalValue::WeakAnyLinkage,
6467         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6468   }
6469 
6470   // If we do not allow host fallback we still need a named address to use.
6471   llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6472   if (!BuildOutlinedFn) {
6473     assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
6474            "Named kernel already exists?");
6475     TargetRegionEntryAddr = new llvm::GlobalVariable(
6476         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6477         llvm::GlobalValue::InternalLinkage,
6478         llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6479   }
6480 
6481   // Register the information for the entry associated with this target region.
6482   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6483       DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
6484       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6485 
6486   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6487   int32_t DefaultValTeams = -1;
6488   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6489   if (DefaultValTeams > 0 && OutlinedFn) {
6490     OutlinedFn->addFnAttr("omp_target_num_teams",
6491                           std::to_string(DefaultValTeams));
6492   }
6493   int32_t DefaultValThreads = -1;
6494   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6495   if (DefaultValThreads > 0 && OutlinedFn) {
6496     OutlinedFn->addFnAttr("omp_target_thread_limit",
6497                           std::to_string(DefaultValThreads));
6498   }
6499 
6500   if (BuildOutlinedFn)
6501     CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6502 }
6503 
6504 /// Checks if the expression is constant or does not have non-trivial function
6505 /// calls.
6506 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6507   // We can skip constant expressions.
6508   // We can skip expressions with trivial calls or simple expressions.
6509   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6510           !E->hasNonTrivialCall(Ctx)) &&
6511          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6512 }
6513 
6514 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6515                                                     const Stmt *Body) {
6516   const Stmt *Child = Body->IgnoreContainers();
6517   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6518     Child = nullptr;
6519     for (const Stmt *S : C->body()) {
6520       if (const auto *E = dyn_cast<Expr>(S)) {
6521         if (isTrivial(Ctx, E))
6522           continue;
6523       }
6524       // Some of the statements can be ignored.
6525       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6526           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6527         continue;
6528       // Analyze declarations.
6529       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6530         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6531               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6532                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6533                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6534                   isa<UsingDirectiveDecl>(D) ||
6535                   isa<OMPDeclareReductionDecl>(D) ||
6536                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6537                 return true;
6538               const auto *VD = dyn_cast<VarDecl>(D);
6539               if (!VD)
6540                 return false;
6541               return VD->hasGlobalStorage() || !VD->isUsed();
6542             }))
6543           continue;
6544       }
6545       // Found multiple children - cannot get the one child only.
6546       if (Child)
6547         return nullptr;
6548       Child = S;
6549     }
6550     if (Child)
6551       Child = Child->IgnoreContainers();
6552   }
6553   return Child;
6554 }
6555 
6556 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6557     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6558     int32_t &DefaultVal) {
6559 
6560   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6561   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6562          "Expected target-based executable directive.");
6563   switch (DirectiveKind) {
6564   case OMPD_target: {
6565     const auto *CS = D.getInnermostCapturedStmt();
6566     const auto *Body =
6567         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6568     const Stmt *ChildStmt =
6569         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6570     if (const auto *NestedDir =
6571             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6572       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6573         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6574           const Expr *NumTeams =
6575               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6576           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6577             if (auto Constant =
6578                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6579               DefaultVal = Constant->getExtValue();
6580           return NumTeams;
6581         }
6582         DefaultVal = 0;
6583         return nullptr;
6584       }
6585       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6586           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6587         DefaultVal = 1;
6588         return nullptr;
6589       }
6590       DefaultVal = 1;
6591       return nullptr;
6592     }
6593     // A value of -1 is used to check if we need to emit no teams region
6594     DefaultVal = -1;
6595     return nullptr;
6596   }
6597   case OMPD_target_teams:
6598   case OMPD_target_teams_distribute:
6599   case OMPD_target_teams_distribute_simd:
6600   case OMPD_target_teams_distribute_parallel_for:
6601   case OMPD_target_teams_distribute_parallel_for_simd: {
6602     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6603       const Expr *NumTeams =
6604           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6605       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6606         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6607           DefaultVal = Constant->getExtValue();
6608       return NumTeams;
6609     }
6610     DefaultVal = 0;
6611     return nullptr;
6612   }
6613   case OMPD_target_parallel:
6614   case OMPD_target_parallel_for:
6615   case OMPD_target_parallel_for_simd:
6616   case OMPD_target_simd:
6617     DefaultVal = 1;
6618     return nullptr;
6619   case OMPD_parallel:
6620   case OMPD_for:
6621   case OMPD_parallel_for:
6622   case OMPD_parallel_master:
6623   case OMPD_parallel_sections:
6624   case OMPD_for_simd:
6625   case OMPD_parallel_for_simd:
6626   case OMPD_cancel:
6627   case OMPD_cancellation_point:
6628   case OMPD_ordered:
6629   case OMPD_threadprivate:
6630   case OMPD_allocate:
6631   case OMPD_task:
6632   case OMPD_simd:
6633   case OMPD_tile:
6634   case OMPD_unroll:
6635   case OMPD_sections:
6636   case OMPD_section:
6637   case OMPD_single:
6638   case OMPD_master:
6639   case OMPD_critical:
6640   case OMPD_taskyield:
6641   case OMPD_barrier:
6642   case OMPD_taskwait:
6643   case OMPD_taskgroup:
6644   case OMPD_atomic:
6645   case OMPD_flush:
6646   case OMPD_depobj:
6647   case OMPD_scan:
6648   case OMPD_teams:
6649   case OMPD_target_data:
6650   case OMPD_target_exit_data:
6651   case OMPD_target_enter_data:
6652   case OMPD_distribute:
6653   case OMPD_distribute_simd:
6654   case OMPD_distribute_parallel_for:
6655   case OMPD_distribute_parallel_for_simd:
6656   case OMPD_teams_distribute:
6657   case OMPD_teams_distribute_simd:
6658   case OMPD_teams_distribute_parallel_for:
6659   case OMPD_teams_distribute_parallel_for_simd:
6660   case OMPD_target_update:
6661   case OMPD_declare_simd:
6662   case OMPD_declare_variant:
6663   case OMPD_begin_declare_variant:
6664   case OMPD_end_declare_variant:
6665   case OMPD_declare_target:
6666   case OMPD_end_declare_target:
6667   case OMPD_declare_reduction:
6668   case OMPD_declare_mapper:
6669   case OMPD_taskloop:
6670   case OMPD_taskloop_simd:
6671   case OMPD_master_taskloop:
6672   case OMPD_master_taskloop_simd:
6673   case OMPD_parallel_master_taskloop:
6674   case OMPD_parallel_master_taskloop_simd:
6675   case OMPD_requires:
6676   case OMPD_metadirective:
6677   case OMPD_unknown:
6678     break;
6679   default:
6680     break;
6681   }
6682   llvm_unreachable("Unexpected directive kind.");
6683 }
6684 
6685 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6686     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6687   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6688          "Clauses associated with the teams directive expected to be emitted "
6689          "only for the host!");
6690   CGBuilderTy &Bld = CGF.Builder;
6691   int32_t DefaultNT = -1;
6692   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6693   if (NumTeams != nullptr) {
6694     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6695 
6696     switch (DirectiveKind) {
6697     case OMPD_target: {
6698       const auto *CS = D.getInnermostCapturedStmt();
6699       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6700       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6701       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6702                                                   /*IgnoreResultAssign*/ true);
6703       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6704                              /*isSigned=*/true);
6705     }
6706     case OMPD_target_teams:
6707     case OMPD_target_teams_distribute:
6708     case OMPD_target_teams_distribute_simd:
6709     case OMPD_target_teams_distribute_parallel_for:
6710     case OMPD_target_teams_distribute_parallel_for_simd: {
6711       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6712       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6713                                                   /*IgnoreResultAssign*/ true);
6714       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6715                              /*isSigned=*/true);
6716     }
6717     default:
6718       break;
6719     }
6720   } else if (DefaultNT == -1) {
6721     return nullptr;
6722   }
6723 
6724   return Bld.getInt32(DefaultNT);
6725 }
6726 
6727 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6728                                   llvm::Value *DefaultThreadLimitVal) {
6729   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6730       CGF.getContext(), CS->getCapturedStmt());
6731   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6732     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6733       llvm::Value *NumThreads = nullptr;
6734       llvm::Value *CondVal = nullptr;
6735       // Handle if clause. If if clause present, the number of threads is
6736       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6737       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6738         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6739         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6740         const OMPIfClause *IfClause = nullptr;
6741         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6742           if (C->getNameModifier() == OMPD_unknown ||
6743               C->getNameModifier() == OMPD_parallel) {
6744             IfClause = C;
6745             break;
6746           }
6747         }
6748         if (IfClause) {
6749           const Expr *Cond = IfClause->getCondition();
6750           bool Result;
6751           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6752             if (!Result)
6753               return CGF.Builder.getInt32(1);
6754           } else {
6755             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6756             if (const auto *PreInit =
6757                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6758               for (const auto *I : PreInit->decls()) {
6759                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6760                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6761                 } else {
6762                   CodeGenFunction::AutoVarEmission Emission =
6763                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6764                   CGF.EmitAutoVarCleanups(Emission);
6765                 }
6766               }
6767             }
6768             CondVal = CGF.EvaluateExprAsBool(Cond);
6769           }
6770         }
6771       }
6772       // Check the value of num_threads clause iff if clause was not specified
6773       // or is not evaluated to false.
6774       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6775         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6776         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6777         const auto *NumThreadsClause =
6778             Dir->getSingleClause<OMPNumThreadsClause>();
6779         CodeGenFunction::LexicalScope Scope(
6780             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6781         if (const auto *PreInit =
6782                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6783           for (const auto *I : PreInit->decls()) {
6784             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6785               CGF.EmitVarDecl(cast<VarDecl>(*I));
6786             } else {
6787               CodeGenFunction::AutoVarEmission Emission =
6788                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6789               CGF.EmitAutoVarCleanups(Emission);
6790             }
6791           }
6792         }
6793         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6794         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6795                                                /*isSigned=*/false);
6796         if (DefaultThreadLimitVal)
6797           NumThreads = CGF.Builder.CreateSelect(
6798               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6799               DefaultThreadLimitVal, NumThreads);
6800       } else {
6801         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6802                                            : CGF.Builder.getInt32(0);
6803       }
6804       // Process condition of the if clause.
6805       if (CondVal) {
6806         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6807                                               CGF.Builder.getInt32(1));
6808       }
6809       return NumThreads;
6810     }
6811     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6812       return CGF.Builder.getInt32(1);
6813     return DefaultThreadLimitVal;
6814   }
6815   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6816                                : CGF.Builder.getInt32(0);
6817 }
6818 
6819 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6820     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6821     int32_t &DefaultVal) {
6822   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6823   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6824          "Expected target-based executable directive.");
6825 
6826   switch (DirectiveKind) {
6827   case OMPD_target:
6828     // Teams have no clause thread_limit
6829     return nullptr;
6830   case OMPD_target_teams:
6831   case OMPD_target_teams_distribute:
6832     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6833       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6834       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6835       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6836         if (auto Constant =
6837                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6838           DefaultVal = Constant->getExtValue();
6839       return ThreadLimit;
6840     }
6841     return nullptr;
6842   case OMPD_target_parallel:
6843   case OMPD_target_parallel_for:
6844   case OMPD_target_parallel_for_simd:
6845   case OMPD_target_teams_distribute_parallel_for:
6846   case OMPD_target_teams_distribute_parallel_for_simd: {
6847     Expr *ThreadLimit = nullptr;
6848     Expr *NumThreads = nullptr;
6849     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6850       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6851       ThreadLimit = ThreadLimitClause->getThreadLimit();
6852       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6853         if (auto Constant =
6854                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6855           DefaultVal = Constant->getExtValue();
6856     }
6857     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6858       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6859       NumThreads = NumThreadsClause->getNumThreads();
6860       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6861         if (auto Constant =
6862                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6863           if (Constant->getExtValue() < DefaultVal) {
6864             DefaultVal = Constant->getExtValue();
6865             ThreadLimit = NumThreads;
6866           }
6867         }
6868       }
6869     }
6870     return ThreadLimit;
6871   }
6872   case OMPD_target_teams_distribute_simd:
6873   case OMPD_target_simd:
6874     DefaultVal = 1;
6875     return nullptr;
6876   case OMPD_parallel:
6877   case OMPD_for:
6878   case OMPD_parallel_for:
6879   case OMPD_parallel_master:
6880   case OMPD_parallel_sections:
6881   case OMPD_for_simd:
6882   case OMPD_parallel_for_simd:
6883   case OMPD_cancel:
6884   case OMPD_cancellation_point:
6885   case OMPD_ordered:
6886   case OMPD_threadprivate:
6887   case OMPD_allocate:
6888   case OMPD_task:
6889   case OMPD_simd:
6890   case OMPD_tile:
6891   case OMPD_unroll:
6892   case OMPD_sections:
6893   case OMPD_section:
6894   case OMPD_single:
6895   case OMPD_master:
6896   case OMPD_critical:
6897   case OMPD_taskyield:
6898   case OMPD_barrier:
6899   case OMPD_taskwait:
6900   case OMPD_taskgroup:
6901   case OMPD_atomic:
6902   case OMPD_flush:
6903   case OMPD_depobj:
6904   case OMPD_scan:
6905   case OMPD_teams:
6906   case OMPD_target_data:
6907   case OMPD_target_exit_data:
6908   case OMPD_target_enter_data:
6909   case OMPD_distribute:
6910   case OMPD_distribute_simd:
6911   case OMPD_distribute_parallel_for:
6912   case OMPD_distribute_parallel_for_simd:
6913   case OMPD_teams_distribute:
6914   case OMPD_teams_distribute_simd:
6915   case OMPD_teams_distribute_parallel_for:
6916   case OMPD_teams_distribute_parallel_for_simd:
6917   case OMPD_target_update:
6918   case OMPD_declare_simd:
6919   case OMPD_declare_variant:
6920   case OMPD_begin_declare_variant:
6921   case OMPD_end_declare_variant:
6922   case OMPD_declare_target:
6923   case OMPD_end_declare_target:
6924   case OMPD_declare_reduction:
6925   case OMPD_declare_mapper:
6926   case OMPD_taskloop:
6927   case OMPD_taskloop_simd:
6928   case OMPD_master_taskloop:
6929   case OMPD_master_taskloop_simd:
6930   case OMPD_parallel_master_taskloop:
6931   case OMPD_parallel_master_taskloop_simd:
6932   case OMPD_requires:
6933   case OMPD_unknown:
6934     break;
6935   default:
6936     break;
6937   }
6938   llvm_unreachable("Unsupported directive kind.");
6939 }
6940 
6941 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6942     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6943   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6944          "Clauses associated with the teams directive expected to be emitted "
6945          "only for the host!");
6946   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6947   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6948          "Expected target-based executable directive.");
6949   CGBuilderTy &Bld = CGF.Builder;
6950   llvm::Value *ThreadLimitVal = nullptr;
6951   llvm::Value *NumThreadsVal = nullptr;
6952   switch (DirectiveKind) {
6953   case OMPD_target: {
6954     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6955     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6956       return NumThreads;
6957     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6958         CGF.getContext(), CS->getCapturedStmt());
6959     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6960       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6961         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6962         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6963         const auto *ThreadLimitClause =
6964             Dir->getSingleClause<OMPThreadLimitClause>();
6965         CodeGenFunction::LexicalScope Scope(
6966             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6967         if (const auto *PreInit =
6968                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6969           for (const auto *I : PreInit->decls()) {
6970             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6971               CGF.EmitVarDecl(cast<VarDecl>(*I));
6972             } else {
6973               CodeGenFunction::AutoVarEmission Emission =
6974                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6975               CGF.EmitAutoVarCleanups(Emission);
6976             }
6977           }
6978         }
6979         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6980             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6981         ThreadLimitVal =
6982             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6983       }
6984       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6985           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6986         CS = Dir->getInnermostCapturedStmt();
6987         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6988             CGF.getContext(), CS->getCapturedStmt());
6989         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6990       }
6991       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6992           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6993         CS = Dir->getInnermostCapturedStmt();
6994         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6995           return NumThreads;
6996       }
6997       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6998         return Bld.getInt32(1);
6999     }
7000     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7001   }
7002   case OMPD_target_teams: {
7003     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7004       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7005       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7006       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7007           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7008       ThreadLimitVal =
7009           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7010     }
7011     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7012     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7013       return NumThreads;
7014     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7015         CGF.getContext(), CS->getCapturedStmt());
7016     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7017       if (Dir->getDirectiveKind() == OMPD_distribute) {
7018         CS = Dir->getInnermostCapturedStmt();
7019         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7020           return NumThreads;
7021       }
7022     }
7023     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7024   }
7025   case OMPD_target_teams_distribute:
7026     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7027       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7028       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7029       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7030           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7031       ThreadLimitVal =
7032           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7033     }
7034     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7035   case OMPD_target_parallel:
7036   case OMPD_target_parallel_for:
7037   case OMPD_target_parallel_for_simd:
7038   case OMPD_target_teams_distribute_parallel_for:
7039   case OMPD_target_teams_distribute_parallel_for_simd: {
7040     llvm::Value *CondVal = nullptr;
7041     // Handle if clause. If if clause present, the number of threads is
7042     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7043     if (D.hasClausesOfKind<OMPIfClause>()) {
7044       const OMPIfClause *IfClause = nullptr;
7045       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7046         if (C->getNameModifier() == OMPD_unknown ||
7047             C->getNameModifier() == OMPD_parallel) {
7048           IfClause = C;
7049           break;
7050         }
7051       }
7052       if (IfClause) {
7053         const Expr *Cond = IfClause->getCondition();
7054         bool Result;
7055         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7056           if (!Result)
7057             return Bld.getInt32(1);
7058         } else {
7059           CodeGenFunction::RunCleanupsScope Scope(CGF);
7060           CondVal = CGF.EvaluateExprAsBool(Cond);
7061         }
7062       }
7063     }
7064     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7065       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7066       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7067       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7068           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7069       ThreadLimitVal =
7070           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7071     }
7072     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7073       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7074       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7075       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7076           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7077       NumThreadsVal =
7078           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7079       ThreadLimitVal = ThreadLimitVal
7080                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7081                                                                 ThreadLimitVal),
7082                                               NumThreadsVal, ThreadLimitVal)
7083                            : NumThreadsVal;
7084     }
7085     if (!ThreadLimitVal)
7086       ThreadLimitVal = Bld.getInt32(0);
7087     if (CondVal)
7088       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7089     return ThreadLimitVal;
7090   }
7091   case OMPD_target_teams_distribute_simd:
7092   case OMPD_target_simd:
7093     return Bld.getInt32(1);
7094   case OMPD_parallel:
7095   case OMPD_for:
7096   case OMPD_parallel_for:
7097   case OMPD_parallel_master:
7098   case OMPD_parallel_sections:
7099   case OMPD_for_simd:
7100   case OMPD_parallel_for_simd:
7101   case OMPD_cancel:
7102   case OMPD_cancellation_point:
7103   case OMPD_ordered:
7104   case OMPD_threadprivate:
7105   case OMPD_allocate:
7106   case OMPD_task:
7107   case OMPD_simd:
7108   case OMPD_tile:
7109   case OMPD_unroll:
7110   case OMPD_sections:
7111   case OMPD_section:
7112   case OMPD_single:
7113   case OMPD_master:
7114   case OMPD_critical:
7115   case OMPD_taskyield:
7116   case OMPD_barrier:
7117   case OMPD_taskwait:
7118   case OMPD_taskgroup:
7119   case OMPD_atomic:
7120   case OMPD_flush:
7121   case OMPD_depobj:
7122   case OMPD_scan:
7123   case OMPD_teams:
7124   case OMPD_target_data:
7125   case OMPD_target_exit_data:
7126   case OMPD_target_enter_data:
7127   case OMPD_distribute:
7128   case OMPD_distribute_simd:
7129   case OMPD_distribute_parallel_for:
7130   case OMPD_distribute_parallel_for_simd:
7131   case OMPD_teams_distribute:
7132   case OMPD_teams_distribute_simd:
7133   case OMPD_teams_distribute_parallel_for:
7134   case OMPD_teams_distribute_parallel_for_simd:
7135   case OMPD_target_update:
7136   case OMPD_declare_simd:
7137   case OMPD_declare_variant:
7138   case OMPD_begin_declare_variant:
7139   case OMPD_end_declare_variant:
7140   case OMPD_declare_target:
7141   case OMPD_end_declare_target:
7142   case OMPD_declare_reduction:
7143   case OMPD_declare_mapper:
7144   case OMPD_taskloop:
7145   case OMPD_taskloop_simd:
7146   case OMPD_master_taskloop:
7147   case OMPD_master_taskloop_simd:
7148   case OMPD_parallel_master_taskloop:
7149   case OMPD_parallel_master_taskloop_simd:
7150   case OMPD_requires:
7151   case OMPD_metadirective:
7152   case OMPD_unknown:
7153     break;
7154   default:
7155     break;
7156   }
7157   llvm_unreachable("Unsupported directive kind.");
7158 }
7159 
7160 namespace {
7161 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7162 
7163 // Utility to handle information from clauses associated with a given
7164 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7165 // It provides a convenient interface to obtain the information and generate
7166 // code for that information.
7167 class MappableExprsHandler {
7168 public:
7169   /// Values for bit flags used to specify the mapping type for
7170   /// offloading.
7171   enum OpenMPOffloadMappingFlags : uint64_t {
7172     /// No flags
7173     OMP_MAP_NONE = 0x0,
7174     /// Allocate memory on the device and move data from host to device.
7175     OMP_MAP_TO = 0x01,
7176     /// Allocate memory on the device and move data from device to host.
7177     OMP_MAP_FROM = 0x02,
7178     /// Always perform the requested mapping action on the element, even
7179     /// if it was already mapped before.
7180     OMP_MAP_ALWAYS = 0x04,
7181     /// Delete the element from the device environment, ignoring the
7182     /// current reference count associated with the element.
7183     OMP_MAP_DELETE = 0x08,
7184     /// The element being mapped is a pointer-pointee pair; both the
7185     /// pointer and the pointee should be mapped.
7186     OMP_MAP_PTR_AND_OBJ = 0x10,
7187     /// This flags signals that the base address of an entry should be
7188     /// passed to the target kernel as an argument.
7189     OMP_MAP_TARGET_PARAM = 0x20,
7190     /// Signal that the runtime library has to return the device pointer
7191     /// in the current position for the data being mapped. Used when we have the
7192     /// use_device_ptr or use_device_addr clause.
7193     OMP_MAP_RETURN_PARAM = 0x40,
7194     /// This flag signals that the reference being passed is a pointer to
7195     /// private data.
7196     OMP_MAP_PRIVATE = 0x80,
7197     /// Pass the element to the device by value.
7198     OMP_MAP_LITERAL = 0x100,
7199     /// Implicit map
7200     OMP_MAP_IMPLICIT = 0x200,
7201     /// Close is a hint to the runtime to allocate memory close to
7202     /// the target device.
7203     OMP_MAP_CLOSE = 0x400,
7204     /// 0x800 is reserved for compatibility with XLC.
7205     /// Produce a runtime error if the data is not already allocated.
7206     OMP_MAP_PRESENT = 0x1000,
7207     // Increment and decrement a separate reference counter so that the data
7208     // cannot be unmapped within the associated region.  Thus, this flag is
7209     // intended to be used on 'target' and 'target data' directives because they
7210     // are inherently structured.  It is not intended to be used on 'target
7211     // enter data' and 'target exit data' directives because they are inherently
7212     // dynamic.
7213     // This is an OpenMP extension for the sake of OpenACC support.
7214     OMP_MAP_OMPX_HOLD = 0x2000,
7215     /// Signal that the runtime library should use args as an array of
7216     /// descriptor_dim pointers and use args_size as dims. Used when we have
7217     /// non-contiguous list items in target update directive
7218     OMP_MAP_NON_CONTIG = 0x100000000000,
7219     /// The 16 MSBs of the flags indicate whether the entry is member of some
7220     /// struct/class.
7221     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7222     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7223   };
7224 
7225   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7226   static unsigned getFlagMemberOffset() {
7227     unsigned Offset = 0;
7228     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7229          Remain = Remain >> 1)
7230       Offset++;
7231     return Offset;
7232   }
7233 
7234   /// Class that holds debugging information for a data mapping to be passed to
7235   /// the runtime library.
7236   class MappingExprInfo {
7237     /// The variable declaration used for the data mapping.
7238     const ValueDecl *MapDecl = nullptr;
7239     /// The original expression used in the map clause, or null if there is
7240     /// none.
7241     const Expr *MapExpr = nullptr;
7242 
7243   public:
7244     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7245         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7246 
7247     const ValueDecl *getMapDecl() const { return MapDecl; }
7248     const Expr *getMapExpr() const { return MapExpr; }
7249   };
7250 
7251   /// Class that associates information with a base pointer to be passed to the
7252   /// runtime library.
7253   class BasePointerInfo {
7254     /// The base pointer.
7255     llvm::Value *Ptr = nullptr;
7256     /// The base declaration that refers to this device pointer, or null if
7257     /// there is none.
7258     const ValueDecl *DevPtrDecl = nullptr;
7259 
7260   public:
7261     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7262         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7263     llvm::Value *operator*() const { return Ptr; }
7264     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7265     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7266   };
7267 
7268   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7269   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7270   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7271   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7272   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7273   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7274   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7275 
7276   /// This structure contains combined information generated for mappable
7277   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7278   /// mappers, and non-contiguous information.
7279   struct MapCombinedInfoTy {
7280     struct StructNonContiguousInfo {
7281       bool IsNonContiguous = false;
7282       MapDimArrayTy Dims;
7283       MapNonContiguousArrayTy Offsets;
7284       MapNonContiguousArrayTy Counts;
7285       MapNonContiguousArrayTy Strides;
7286     };
7287     MapExprsArrayTy Exprs;
7288     MapBaseValuesArrayTy BasePointers;
7289     MapValuesArrayTy Pointers;
7290     MapValuesArrayTy Sizes;
7291     MapFlagsArrayTy Types;
7292     MapMappersArrayTy Mappers;
7293     StructNonContiguousInfo NonContigInfo;
7294 
7295     /// Append arrays in \a CurInfo.
7296     void append(MapCombinedInfoTy &CurInfo) {
7297       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7298       BasePointers.append(CurInfo.BasePointers.begin(),
7299                           CurInfo.BasePointers.end());
7300       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7301       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7302       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7303       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7304       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7305                                  CurInfo.NonContigInfo.Dims.end());
7306       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7307                                     CurInfo.NonContigInfo.Offsets.end());
7308       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7309                                    CurInfo.NonContigInfo.Counts.end());
7310       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7311                                     CurInfo.NonContigInfo.Strides.end());
7312     }
7313   };
7314 
7315   /// Map between a struct and the its lowest & highest elements which have been
7316   /// mapped.
7317   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7318   ///                    HE(FieldIndex, Pointer)}
7319   struct StructRangeInfoTy {
7320     MapCombinedInfoTy PreliminaryMapData;
7321     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7322         0, Address::invalid()};
7323     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7324         0, Address::invalid()};
7325     Address Base = Address::invalid();
7326     Address LB = Address::invalid();
7327     bool IsArraySection = false;
7328     bool HasCompleteRecord = false;
7329   };
7330 
7331 private:
7332   /// Kind that defines how a device pointer has to be returned.
7333   struct MapInfo {
7334     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7335     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7336     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7337     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7338     bool ReturnDevicePointer = false;
7339     bool IsImplicit = false;
7340     const ValueDecl *Mapper = nullptr;
7341     const Expr *VarRef = nullptr;
7342     bool ForDeviceAddr = false;
7343 
7344     MapInfo() = default;
7345     MapInfo(
7346         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7347         OpenMPMapClauseKind MapType,
7348         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7349         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7350         bool ReturnDevicePointer, bool IsImplicit,
7351         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7352         bool ForDeviceAddr = false)
7353         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7354           MotionModifiers(MotionModifiers),
7355           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7356           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7357   };
7358 
7359   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7360   /// member and there is no map information about it, then emission of that
7361   /// entry is deferred until the whole struct has been processed.
7362   struct DeferredDevicePtrEntryTy {
7363     const Expr *IE = nullptr;
7364     const ValueDecl *VD = nullptr;
7365     bool ForDeviceAddr = false;
7366 
7367     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7368                              bool ForDeviceAddr)
7369         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7370   };
7371 
7372   /// The target directive from where the mappable clauses were extracted. It
7373   /// is either a executable directive or a user-defined mapper directive.
7374   llvm::PointerUnion<const OMPExecutableDirective *,
7375                      const OMPDeclareMapperDecl *>
7376       CurDir;
7377 
7378   /// Function the directive is being generated for.
7379   CodeGenFunction &CGF;
7380 
7381   /// Set of all first private variables in the current directive.
7382   /// bool data is set to true if the variable is implicitly marked as
7383   /// firstprivate, false otherwise.
7384   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7385 
7386   /// Map between device pointer declarations and their expression components.
7387   /// The key value for declarations in 'this' is null.
7388   llvm::DenseMap<
7389       const ValueDecl *,
7390       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7391       DevPointersMap;
7392 
7393   /// Map between lambda declarations and their map type.
7394   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7395 
7396   llvm::Value *getExprTypeSize(const Expr *E) const {
7397     QualType ExprTy = E->getType().getCanonicalType();
7398 
7399     // Calculate the size for array shaping expression.
7400     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7401       llvm::Value *Size =
7402           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7403       for (const Expr *SE : OAE->getDimensions()) {
7404         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7405         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7406                                       CGF.getContext().getSizeType(),
7407                                       SE->getExprLoc());
7408         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7409       }
7410       return Size;
7411     }
7412 
7413     // Reference types are ignored for mapping purposes.
7414     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7415       ExprTy = RefTy->getPointeeType().getCanonicalType();
7416 
7417     // Given that an array section is considered a built-in type, we need to
7418     // do the calculation based on the length of the section instead of relying
7419     // on CGF.getTypeSize(E->getType()).
7420     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7421       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7422                             OAE->getBase()->IgnoreParenImpCasts())
7423                             .getCanonicalType();
7424 
7425       // If there is no length associated with the expression and lower bound is
7426       // not specified too, that means we are using the whole length of the
7427       // base.
7428       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7429           !OAE->getLowerBound())
7430         return CGF.getTypeSize(BaseTy);
7431 
7432       llvm::Value *ElemSize;
7433       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7434         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7435       } else {
7436         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7437         assert(ATy && "Expecting array type if not a pointer type.");
7438         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7439       }
7440 
7441       // If we don't have a length at this point, that is because we have an
7442       // array section with a single element.
7443       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7444         return ElemSize;
7445 
7446       if (const Expr *LenExpr = OAE->getLength()) {
7447         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7448         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7449                                              CGF.getContext().getSizeType(),
7450                                              LenExpr->getExprLoc());
7451         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7452       }
7453       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7454              OAE->getLowerBound() && "expected array_section[lb:].");
7455       // Size = sizetype - lb * elemtype;
7456       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7457       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7458       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7459                                        CGF.getContext().getSizeType(),
7460                                        OAE->getLowerBound()->getExprLoc());
7461       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7462       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7463       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7464       LengthVal = CGF.Builder.CreateSelect(
7465           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7466       return LengthVal;
7467     }
7468     return CGF.getTypeSize(ExprTy);
7469   }
7470 
7471   /// Return the corresponding bits for a given map clause modifier. Add
7472   /// a flag marking the map as a pointer if requested. Add a flag marking the
7473   /// map as the first one of a series of maps that relate to the same map
7474   /// expression.
7475   OpenMPOffloadMappingFlags getMapTypeBits(
7476       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7477       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7478       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7479     OpenMPOffloadMappingFlags Bits =
7480         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7481     switch (MapType) {
7482     case OMPC_MAP_alloc:
7483     case OMPC_MAP_release:
7484       // alloc and release is the default behavior in the runtime library,  i.e.
7485       // if we don't pass any bits alloc/release that is what the runtime is
7486       // going to do. Therefore, we don't need to signal anything for these two
7487       // type modifiers.
7488       break;
7489     case OMPC_MAP_to:
7490       Bits |= OMP_MAP_TO;
7491       break;
7492     case OMPC_MAP_from:
7493       Bits |= OMP_MAP_FROM;
7494       break;
7495     case OMPC_MAP_tofrom:
7496       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7497       break;
7498     case OMPC_MAP_delete:
7499       Bits |= OMP_MAP_DELETE;
7500       break;
7501     case OMPC_MAP_unknown:
7502       llvm_unreachable("Unexpected map type!");
7503     }
7504     if (AddPtrFlag)
7505       Bits |= OMP_MAP_PTR_AND_OBJ;
7506     if (AddIsTargetParamFlag)
7507       Bits |= OMP_MAP_TARGET_PARAM;
7508     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7509       Bits |= OMP_MAP_ALWAYS;
7510     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7511       Bits |= OMP_MAP_CLOSE;
7512     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7513         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7514       Bits |= OMP_MAP_PRESENT;
7515     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7516       Bits |= OMP_MAP_OMPX_HOLD;
7517     if (IsNonContiguous)
7518       Bits |= OMP_MAP_NON_CONTIG;
7519     return Bits;
7520   }
7521 
7522   /// Return true if the provided expression is a final array section. A
7523   /// final array section, is one whose length can't be proved to be one.
7524   bool isFinalArraySectionExpression(const Expr *E) const {
7525     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7526 
7527     // It is not an array section and therefore not a unity-size one.
7528     if (!OASE)
7529       return false;
7530 
7531     // An array section with no colon always refer to a single element.
7532     if (OASE->getColonLocFirst().isInvalid())
7533       return false;
7534 
7535     const Expr *Length = OASE->getLength();
7536 
7537     // If we don't have a length we have to check if the array has size 1
7538     // for this dimension. Also, we should always expect a length if the
7539     // base type is pointer.
7540     if (!Length) {
7541       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7542                              OASE->getBase()->IgnoreParenImpCasts())
7543                              .getCanonicalType();
7544       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7545         return ATy->getSize().getSExtValue() != 1;
7546       // If we don't have a constant dimension length, we have to consider
7547       // the current section as having any size, so it is not necessarily
7548       // unitary. If it happen to be unity size, that's user fault.
7549       return true;
7550     }
7551 
7552     // Check if the length evaluates to 1.
7553     Expr::EvalResult Result;
7554     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7555       return true; // Can have more that size 1.
7556 
7557     llvm::APSInt ConstLength = Result.Val.getInt();
7558     return ConstLength.getSExtValue() != 1;
7559   }
7560 
7561   /// Generate the base pointers, section pointers, sizes, map type bits, and
7562   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7563   /// map type, map or motion modifiers, and expression components.
7564   /// \a IsFirstComponent should be set to true if the provided set of
7565   /// components is the first associated with a capture.
7566   void generateInfoForComponentList(
7567       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7568       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7569       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7570       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7571       bool IsFirstComponentList, bool IsImplicit,
7572       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7573       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7574       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7575           OverlappedElements = llvm::None) const {
7576     // The following summarizes what has to be generated for each map and the
7577     // types below. The generated information is expressed in this order:
7578     // base pointer, section pointer, size, flags
7579     // (to add to the ones that come from the map type and modifier).
7580     //
7581     // double d;
7582     // int i[100];
7583     // float *p;
7584     //
7585     // struct S1 {
7586     //   int i;
7587     //   float f[50];
7588     // }
7589     // struct S2 {
7590     //   int i;
7591     //   float f[50];
7592     //   S1 s;
7593     //   double *p;
7594     //   struct S2 *ps;
7595     //   int &ref;
7596     // }
7597     // S2 s;
7598     // S2 *ps;
7599     //
7600     // map(d)
7601     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7602     //
7603     // map(i)
7604     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7605     //
7606     // map(i[1:23])
7607     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7608     //
7609     // map(p)
7610     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7611     //
7612     // map(p[1:24])
7613     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7614     // in unified shared memory mode or for local pointers
7615     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7616     //
7617     // map(s)
7618     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7619     //
7620     // map(s.i)
7621     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7622     //
7623     // map(s.s.f)
7624     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7625     //
7626     // map(s.p)
7627     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7628     //
7629     // map(to: s.p[:22])
7630     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7631     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7632     // &(s.p), &(s.p[0]), 22*sizeof(double),
7633     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7634     // (*) alloc space for struct members, only this is a target parameter
7635     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7636     //      optimizes this entry out, same in the examples below)
7637     // (***) map the pointee (map: to)
7638     //
7639     // map(to: s.ref)
7640     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7641     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7642     // (*) alloc space for struct members, only this is a target parameter
7643     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7644     //      optimizes this entry out, same in the examples below)
7645     // (***) map the pointee (map: to)
7646     //
7647     // map(s.ps)
7648     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7649     //
7650     // map(from: s.ps->s.i)
7651     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7652     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7653     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7654     //
7655     // map(to: s.ps->ps)
7656     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7657     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7658     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7659     //
7660     // map(s.ps->ps->ps)
7661     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7662     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7663     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7664     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7665     //
7666     // map(to: s.ps->ps->s.f[:22])
7667     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7668     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7669     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7670     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7671     //
7672     // map(ps)
7673     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7674     //
7675     // map(ps->i)
7676     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7677     //
7678     // map(ps->s.f)
7679     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7680     //
7681     // map(from: ps->p)
7682     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7683     //
7684     // map(to: ps->p[:22])
7685     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7686     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7687     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7688     //
7689     // map(ps->ps)
7690     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7691     //
7692     // map(from: ps->ps->s.i)
7693     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7694     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7695     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7696     //
7697     // map(from: ps->ps->ps)
7698     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7699     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7700     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7701     //
7702     // map(ps->ps->ps->ps)
7703     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7704     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7705     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7706     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7707     //
7708     // map(to: ps->ps->ps->s.f[:22])
7709     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7710     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7711     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7712     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7713     //
7714     // map(to: s.f[:22]) map(from: s.p[:33])
7715     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7716     //     sizeof(double*) (**), TARGET_PARAM
7717     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7718     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7719     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7720     // (*) allocate contiguous space needed to fit all mapped members even if
7721     //     we allocate space for members not mapped (in this example,
7722     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7723     //     them as well because they fall between &s.f[0] and &s.p)
7724     //
7725     // map(from: s.f[:22]) map(to: ps->p[:33])
7726     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7727     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7728     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7729     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7730     // (*) the struct this entry pertains to is the 2nd element in the list of
7731     //     arguments, hence MEMBER_OF(2)
7732     //
7733     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7734     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7735     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7736     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7737     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7738     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7739     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7740     // (*) the struct this entry pertains to is the 4th element in the list
7741     //     of arguments, hence MEMBER_OF(4)
7742 
7743     // Track if the map information being generated is the first for a capture.
7744     bool IsCaptureFirstInfo = IsFirstComponentList;
7745     // When the variable is on a declare target link or in a to clause with
7746     // unified memory, a reference is needed to hold the host/device address
7747     // of the variable.
7748     bool RequiresReference = false;
7749 
7750     // Scan the components from the base to the complete expression.
7751     auto CI = Components.rbegin();
7752     auto CE = Components.rend();
7753     auto I = CI;
7754 
7755     // Track if the map information being generated is the first for a list of
7756     // components.
7757     bool IsExpressionFirstInfo = true;
7758     bool FirstPointerInComplexData = false;
7759     Address BP = Address::invalid();
7760     const Expr *AssocExpr = I->getAssociatedExpression();
7761     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7762     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7763     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7764 
7765     if (isa<MemberExpr>(AssocExpr)) {
7766       // The base is the 'this' pointer. The content of the pointer is going
7767       // to be the base of the field being mapped.
7768       BP = CGF.LoadCXXThisAddress();
7769     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7770                (OASE &&
7771                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7772       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7773     } else if (OAShE &&
7774                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7775       BP = Address(
7776           CGF.EmitScalarExpr(OAShE->getBase()),
7777           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7778           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7779     } else {
7780       // The base is the reference to the variable.
7781       // BP = &Var.
7782       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7783       if (const auto *VD =
7784               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7785         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7786                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7787           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7788               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7789                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7790             RequiresReference = true;
7791             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7792           }
7793         }
7794       }
7795 
7796       // If the variable is a pointer and is being dereferenced (i.e. is not
7797       // the last component), the base has to be the pointer itself, not its
7798       // reference. References are ignored for mapping purposes.
7799       QualType Ty =
7800           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7801       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7802         // No need to generate individual map information for the pointer, it
7803         // can be associated with the combined storage if shared memory mode is
7804         // active or the base declaration is not global variable.
7805         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7806         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7807             !VD || VD->hasLocalStorage())
7808           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7809         else
7810           FirstPointerInComplexData = true;
7811         ++I;
7812       }
7813     }
7814 
7815     // Track whether a component of the list should be marked as MEMBER_OF some
7816     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7817     // in a component list should be marked as MEMBER_OF, all subsequent entries
7818     // do not belong to the base struct. E.g.
7819     // struct S2 s;
7820     // s.ps->ps->ps->f[:]
7821     //   (1) (2) (3) (4)
7822     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7823     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7824     // is the pointee of ps(2) which is not member of struct s, so it should not
7825     // be marked as such (it is still PTR_AND_OBJ).
7826     // The variable is initialized to false so that PTR_AND_OBJ entries which
7827     // are not struct members are not considered (e.g. array of pointers to
7828     // data).
7829     bool ShouldBeMemberOf = false;
7830 
7831     // Variable keeping track of whether or not we have encountered a component
7832     // in the component list which is a member expression. Useful when we have a
7833     // pointer or a final array section, in which case it is the previous
7834     // component in the list which tells us whether we have a member expression.
7835     // E.g. X.f[:]
7836     // While processing the final array section "[:]" it is "f" which tells us
7837     // whether we are dealing with a member of a declared struct.
7838     const MemberExpr *EncounteredME = nullptr;
7839 
7840     // Track for the total number of dimension. Start from one for the dummy
7841     // dimension.
7842     uint64_t DimSize = 1;
7843 
7844     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7845     bool IsPrevMemberReference = false;
7846 
7847     for (; I != CE; ++I) {
7848       // If the current component is member of a struct (parent struct) mark it.
7849       if (!EncounteredME) {
7850         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7851         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7852         // as MEMBER_OF the parent struct.
7853         if (EncounteredME) {
7854           ShouldBeMemberOf = true;
7855           // Do not emit as complex pointer if this is actually not array-like
7856           // expression.
7857           if (FirstPointerInComplexData) {
7858             QualType Ty = std::prev(I)
7859                               ->getAssociatedDeclaration()
7860                               ->getType()
7861                               .getNonReferenceType();
7862             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7863             FirstPointerInComplexData = false;
7864           }
7865         }
7866       }
7867 
7868       auto Next = std::next(I);
7869 
7870       // We need to generate the addresses and sizes if this is the last
7871       // component, if the component is a pointer or if it is an array section
7872       // whose length can't be proved to be one. If this is a pointer, it
7873       // becomes the base address for the following components.
7874 
7875       // A final array section, is one whose length can't be proved to be one.
7876       // If the map item is non-contiguous then we don't treat any array section
7877       // as final array section.
7878       bool IsFinalArraySection =
7879           !IsNonContiguous &&
7880           isFinalArraySectionExpression(I->getAssociatedExpression());
7881 
7882       // If we have a declaration for the mapping use that, otherwise use
7883       // the base declaration of the map clause.
7884       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7885                                      ? I->getAssociatedDeclaration()
7886                                      : BaseDecl;
7887       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7888                                                : MapExpr;
7889 
7890       // Get information on whether the element is a pointer. Have to do a
7891       // special treatment for array sections given that they are built-in
7892       // types.
7893       const auto *OASE =
7894           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7895       const auto *OAShE =
7896           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7897       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7898       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7899       bool IsPointer =
7900           OAShE ||
7901           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7902                        .getCanonicalType()
7903                        ->isAnyPointerType()) ||
7904           I->getAssociatedExpression()->getType()->isAnyPointerType();
7905       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7906                                MapDecl &&
7907                                MapDecl->getType()->isLValueReferenceType();
7908       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7909 
7910       if (OASE)
7911         ++DimSize;
7912 
7913       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7914           IsFinalArraySection) {
7915         // If this is not the last component, we expect the pointer to be
7916         // associated with an array expression or member expression.
7917         assert((Next == CE ||
7918                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7919                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7920                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7921                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7922                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7923                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7924                "Unexpected expression");
7925 
7926         Address LB = Address::invalid();
7927         Address LowestElem = Address::invalid();
7928         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7929                                        const MemberExpr *E) {
7930           const Expr *BaseExpr = E->getBase();
7931           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7932           // scalar.
7933           LValue BaseLV;
7934           if (E->isArrow()) {
7935             LValueBaseInfo BaseInfo;
7936             TBAAAccessInfo TBAAInfo;
7937             Address Addr =
7938                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7939             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7940             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7941           } else {
7942             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7943           }
7944           return BaseLV;
7945         };
7946         if (OAShE) {
7947           LowestElem = LB =
7948               Address(CGF.EmitScalarExpr(OAShE->getBase()),
7949                       CGF.ConvertTypeForMem(
7950                           OAShE->getBase()->getType()->getPointeeType()),
7951                       CGF.getContext().getTypeAlignInChars(
7952                           OAShE->getBase()->getType()));
7953         } else if (IsMemberReference) {
7954           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7955           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7956           LowestElem = CGF.EmitLValueForFieldInitialization(
7957                               BaseLVal, cast<FieldDecl>(MapDecl))
7958                            .getAddress(CGF);
7959           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7960                    .getAddress(CGF);
7961         } else {
7962           LowestElem = LB =
7963               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7964                   .getAddress(CGF);
7965         }
7966 
7967         // If this component is a pointer inside the base struct then we don't
7968         // need to create any entry for it - it will be combined with the object
7969         // it is pointing to into a single PTR_AND_OBJ entry.
7970         bool IsMemberPointerOrAddr =
7971             EncounteredME &&
7972             (((IsPointer || ForDeviceAddr) &&
7973               I->getAssociatedExpression() == EncounteredME) ||
7974              (IsPrevMemberReference && !IsPointer) ||
7975              (IsMemberReference && Next != CE &&
7976               !Next->getAssociatedExpression()->getType()->isPointerType()));
7977         if (!OverlappedElements.empty() && Next == CE) {
7978           // Handle base element with the info for overlapped elements.
7979           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7980           assert(!IsPointer &&
7981                  "Unexpected base element with the pointer type.");
7982           // Mark the whole struct as the struct that requires allocation on the
7983           // device.
7984           PartialStruct.LowestElem = {0, LowestElem};
7985           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7986               I->getAssociatedExpression()->getType());
7987           Address HB = CGF.Builder.CreateConstGEP(
7988               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7989                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7990               TypeSize.getQuantity() - 1);
7991           PartialStruct.HighestElem = {
7992               std::numeric_limits<decltype(
7993                   PartialStruct.HighestElem.first)>::max(),
7994               HB};
7995           PartialStruct.Base = BP;
7996           PartialStruct.LB = LB;
7997           assert(
7998               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7999               "Overlapped elements must be used only once for the variable.");
8000           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8001           // Emit data for non-overlapped data.
8002           OpenMPOffloadMappingFlags Flags =
8003               OMP_MAP_MEMBER_OF |
8004               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8005                              /*AddPtrFlag=*/false,
8006                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8007           llvm::Value *Size = nullptr;
8008           // Do bitcopy of all non-overlapped structure elements.
8009           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8010                    Component : OverlappedElements) {
8011             Address ComponentLB = Address::invalid();
8012             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8013                  Component) {
8014               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8015                 const auto *FD = dyn_cast<FieldDecl>(VD);
8016                 if (FD && FD->getType()->isLValueReferenceType()) {
8017                   const auto *ME =
8018                       cast<MemberExpr>(MC.getAssociatedExpression());
8019                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8020                   ComponentLB =
8021                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8022                           .getAddress(CGF);
8023                 } else {
8024                   ComponentLB =
8025                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8026                           .getAddress(CGF);
8027                 }
8028                 Size = CGF.Builder.CreatePtrDiff(
8029                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8030                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8031                 break;
8032               }
8033             }
8034             assert(Size && "Failed to determine structure size");
8035             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8036             CombinedInfo.BasePointers.push_back(BP.getPointer());
8037             CombinedInfo.Pointers.push_back(LB.getPointer());
8038             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8039                 Size, CGF.Int64Ty, /*isSigned=*/true));
8040             CombinedInfo.Types.push_back(Flags);
8041             CombinedInfo.Mappers.push_back(nullptr);
8042             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8043                                                                       : 1);
8044             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8045           }
8046           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8047           CombinedInfo.BasePointers.push_back(BP.getPointer());
8048           CombinedInfo.Pointers.push_back(LB.getPointer());
8049           Size = CGF.Builder.CreatePtrDiff(
8050               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8051               CGF.EmitCastToVoidPtr(LB.getPointer()));
8052           CombinedInfo.Sizes.push_back(
8053               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8054           CombinedInfo.Types.push_back(Flags);
8055           CombinedInfo.Mappers.push_back(nullptr);
8056           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8057                                                                     : 1);
8058           break;
8059         }
8060         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8061         if (!IsMemberPointerOrAddr ||
8062             (Next == CE && MapType != OMPC_MAP_unknown)) {
8063           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8064           CombinedInfo.BasePointers.push_back(BP.getPointer());
8065           CombinedInfo.Pointers.push_back(LB.getPointer());
8066           CombinedInfo.Sizes.push_back(
8067               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8068           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8069                                                                     : 1);
8070 
8071           // If Mapper is valid, the last component inherits the mapper.
8072           bool HasMapper = Mapper && Next == CE;
8073           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8074 
8075           // We need to add a pointer flag for each map that comes from the
8076           // same expression except for the first one. We also need to signal
8077           // this map is the first one that relates with the current capture
8078           // (there is a set of entries for each capture).
8079           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8080               MapType, MapModifiers, MotionModifiers, IsImplicit,
8081               !IsExpressionFirstInfo || RequiresReference ||
8082                   FirstPointerInComplexData || IsMemberReference,
8083               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8084 
8085           if (!IsExpressionFirstInfo || IsMemberReference) {
8086             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8087             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8088             if (IsPointer || (IsMemberReference && Next != CE))
8089               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8090                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8091 
8092             if (ShouldBeMemberOf) {
8093               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8094               // should be later updated with the correct value of MEMBER_OF.
8095               Flags |= OMP_MAP_MEMBER_OF;
8096               // From now on, all subsequent PTR_AND_OBJ entries should not be
8097               // marked as MEMBER_OF.
8098               ShouldBeMemberOf = false;
8099             }
8100           }
8101 
8102           CombinedInfo.Types.push_back(Flags);
8103         }
8104 
8105         // If we have encountered a member expression so far, keep track of the
8106         // mapped member. If the parent is "*this", then the value declaration
8107         // is nullptr.
8108         if (EncounteredME) {
8109           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8110           unsigned FieldIndex = FD->getFieldIndex();
8111 
8112           // Update info about the lowest and highest elements for this struct
8113           if (!PartialStruct.Base.isValid()) {
8114             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8115             if (IsFinalArraySection) {
8116               Address HB =
8117                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8118                       .getAddress(CGF);
8119               PartialStruct.HighestElem = {FieldIndex, HB};
8120             } else {
8121               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8122             }
8123             PartialStruct.Base = BP;
8124             PartialStruct.LB = BP;
8125           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8126             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8127           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8128             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8129           }
8130         }
8131 
8132         // Need to emit combined struct for array sections.
8133         if (IsFinalArraySection || IsNonContiguous)
8134           PartialStruct.IsArraySection = true;
8135 
8136         // If we have a final array section, we are done with this expression.
8137         if (IsFinalArraySection)
8138           break;
8139 
8140         // The pointer becomes the base for the next element.
8141         if (Next != CE)
8142           BP = IsMemberReference ? LowestElem : LB;
8143 
8144         IsExpressionFirstInfo = false;
8145         IsCaptureFirstInfo = false;
8146         FirstPointerInComplexData = false;
8147         IsPrevMemberReference = IsMemberReference;
8148       } else if (FirstPointerInComplexData) {
8149         QualType Ty = Components.rbegin()
8150                           ->getAssociatedDeclaration()
8151                           ->getType()
8152                           .getNonReferenceType();
8153         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8154         FirstPointerInComplexData = false;
8155       }
8156     }
8157     // If ran into the whole component - allocate the space for the whole
8158     // record.
8159     if (!EncounteredME)
8160       PartialStruct.HasCompleteRecord = true;
8161 
8162     if (!IsNonContiguous)
8163       return;
8164 
8165     const ASTContext &Context = CGF.getContext();
8166 
8167     // For supporting stride in array section, we need to initialize the first
8168     // dimension size as 1, first offset as 0, and first count as 1
8169     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8170     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8171     MapValuesArrayTy CurStrides;
8172     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8173     uint64_t ElementTypeSize;
8174 
8175     // Collect Size information for each dimension and get the element size as
8176     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8177     // should be [10, 10] and the first stride is 4 btyes.
8178     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8179          Components) {
8180       const Expr *AssocExpr = Component.getAssociatedExpression();
8181       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8182 
8183       if (!OASE)
8184         continue;
8185 
8186       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8187       auto *CAT = Context.getAsConstantArrayType(Ty);
8188       auto *VAT = Context.getAsVariableArrayType(Ty);
8189 
8190       // We need all the dimension size except for the last dimension.
8191       assert((VAT || CAT || &Component == &*Components.begin()) &&
8192              "Should be either ConstantArray or VariableArray if not the "
8193              "first Component");
8194 
8195       // Get element size if CurStrides is empty.
8196       if (CurStrides.empty()) {
8197         const Type *ElementType = nullptr;
8198         if (CAT)
8199           ElementType = CAT->getElementType().getTypePtr();
8200         else if (VAT)
8201           ElementType = VAT->getElementType().getTypePtr();
8202         else
8203           assert(&Component == &*Components.begin() &&
8204                  "Only expect pointer (non CAT or VAT) when this is the "
8205                  "first Component");
8206         // If ElementType is null, then it means the base is a pointer
8207         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8208         // for next iteration.
8209         if (ElementType) {
8210           // For the case that having pointer as base, we need to remove one
8211           // level of indirection.
8212           if (&Component != &*Components.begin())
8213             ElementType = ElementType->getPointeeOrArrayElementType();
8214           ElementTypeSize =
8215               Context.getTypeSizeInChars(ElementType).getQuantity();
8216           CurStrides.push_back(
8217               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8218         }
8219       }
8220       // Get dimension value except for the last dimension since we don't need
8221       // it.
8222       if (DimSizes.size() < Components.size() - 1) {
8223         if (CAT)
8224           DimSizes.push_back(llvm::ConstantInt::get(
8225               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8226         else if (VAT)
8227           DimSizes.push_back(CGF.Builder.CreateIntCast(
8228               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8229               /*IsSigned=*/false));
8230       }
8231     }
8232 
8233     // Skip the dummy dimension since we have already have its information.
8234     auto *DI = DimSizes.begin() + 1;
8235     // Product of dimension.
8236     llvm::Value *DimProd =
8237         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8238 
8239     // Collect info for non-contiguous. Notice that offset, count, and stride
8240     // are only meaningful for array-section, so we insert a null for anything
8241     // other than array-section.
8242     // Also, the size of offset, count, and stride are not the same as
8243     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8244     // count, and stride are the same as the number of non-contiguous
8245     // declaration in target update to/from clause.
8246     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8247          Components) {
8248       const Expr *AssocExpr = Component.getAssociatedExpression();
8249 
8250       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8251         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8252             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8253             /*isSigned=*/false);
8254         CurOffsets.push_back(Offset);
8255         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8256         CurStrides.push_back(CurStrides.back());
8257         continue;
8258       }
8259 
8260       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8261 
8262       if (!OASE)
8263         continue;
8264 
8265       // Offset
8266       const Expr *OffsetExpr = OASE->getLowerBound();
8267       llvm::Value *Offset = nullptr;
8268       if (!OffsetExpr) {
8269         // If offset is absent, then we just set it to zero.
8270         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8271       } else {
8272         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8273                                            CGF.Int64Ty,
8274                                            /*isSigned=*/false);
8275       }
8276       CurOffsets.push_back(Offset);
8277 
8278       // Count
8279       const Expr *CountExpr = OASE->getLength();
8280       llvm::Value *Count = nullptr;
8281       if (!CountExpr) {
8282         // In Clang, once a high dimension is an array section, we construct all
8283         // the lower dimension as array section, however, for case like
8284         // arr[0:2][2], Clang construct the inner dimension as an array section
8285         // but it actually is not in an array section form according to spec.
8286         if (!OASE->getColonLocFirst().isValid() &&
8287             !OASE->getColonLocSecond().isValid()) {
8288           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8289         } else {
8290           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8291           // When the length is absent it defaults to ⌈(size −
8292           // lower-bound)/stride⌉, where size is the size of the array
8293           // dimension.
8294           const Expr *StrideExpr = OASE->getStride();
8295           llvm::Value *Stride =
8296               StrideExpr
8297                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8298                                               CGF.Int64Ty, /*isSigned=*/false)
8299                   : nullptr;
8300           if (Stride)
8301             Count = CGF.Builder.CreateUDiv(
8302                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8303           else
8304             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8305         }
8306       } else {
8307         Count = CGF.EmitScalarExpr(CountExpr);
8308       }
8309       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8310       CurCounts.push_back(Count);
8311 
8312       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8313       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8314       //              Offset      Count     Stride
8315       //    D0          0           1         4    (int)    <- dummy dimension
8316       //    D1          0           2         8    (2 * (1) * 4)
8317       //    D2          1           2         20   (1 * (1 * 5) * 4)
8318       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8319       const Expr *StrideExpr = OASE->getStride();
8320       llvm::Value *Stride =
8321           StrideExpr
8322               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8323                                           CGF.Int64Ty, /*isSigned=*/false)
8324               : nullptr;
8325       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8326       if (Stride)
8327         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8328       else
8329         CurStrides.push_back(DimProd);
8330       if (DI != DimSizes.end())
8331         ++DI;
8332     }
8333 
8334     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8335     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8336     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8337   }
8338 
8339   /// Return the adjusted map modifiers if the declaration a capture refers to
8340   /// appears in a first-private clause. This is expected to be used only with
8341   /// directives that start with 'target'.
8342   MappableExprsHandler::OpenMPOffloadMappingFlags
8343   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8344     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8345 
8346     // A first private variable captured by reference will use only the
8347     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8348     // declaration is known as first-private in this handler.
8349     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8350       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8351         return MappableExprsHandler::OMP_MAP_TO |
8352                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8353       return MappableExprsHandler::OMP_MAP_PRIVATE |
8354              MappableExprsHandler::OMP_MAP_TO;
8355     }
8356     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8357     if (I != LambdasMap.end())
8358       // for map(to: lambda): using user specified map type.
8359       return getMapTypeBits(
8360           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8361           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8362           /*AddPtrFlag=*/false,
8363           /*AddIsTargetParamFlag=*/false,
8364           /*isNonContiguous=*/false);
8365     return MappableExprsHandler::OMP_MAP_TO |
8366            MappableExprsHandler::OMP_MAP_FROM;
8367   }
8368 
8369   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8370     // Rotate by getFlagMemberOffset() bits.
8371     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8372                                                   << getFlagMemberOffset());
8373   }
8374 
8375   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8376                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8377     // If the entry is PTR_AND_OBJ but has not been marked with the special
8378     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8379     // marked as MEMBER_OF.
8380     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8381         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8382       return;
8383 
8384     // Reset the placeholder value to prepare the flag for the assignment of the
8385     // proper MEMBER_OF value.
8386     Flags &= ~OMP_MAP_MEMBER_OF;
8387     Flags |= MemberOfFlag;
8388   }
8389 
8390   void getPlainLayout(const CXXRecordDecl *RD,
8391                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8392                       bool AsBase) const {
8393     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8394 
8395     llvm::StructType *St =
8396         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8397 
8398     unsigned NumElements = St->getNumElements();
8399     llvm::SmallVector<
8400         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8401         RecordLayout(NumElements);
8402 
8403     // Fill bases.
8404     for (const auto &I : RD->bases()) {
8405       if (I.isVirtual())
8406         continue;
8407       const auto *Base = I.getType()->getAsCXXRecordDecl();
8408       // Ignore empty bases.
8409       if (Base->isEmpty() || CGF.getContext()
8410                                  .getASTRecordLayout(Base)
8411                                  .getNonVirtualSize()
8412                                  .isZero())
8413         continue;
8414 
8415       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8416       RecordLayout[FieldIndex] = Base;
8417     }
8418     // Fill in virtual bases.
8419     for (const auto &I : RD->vbases()) {
8420       const auto *Base = I.getType()->getAsCXXRecordDecl();
8421       // Ignore empty bases.
8422       if (Base->isEmpty())
8423         continue;
8424       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8425       if (RecordLayout[FieldIndex])
8426         continue;
8427       RecordLayout[FieldIndex] = Base;
8428     }
8429     // Fill in all the fields.
8430     assert(!RD->isUnion() && "Unexpected union.");
8431     for (const auto *Field : RD->fields()) {
8432       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8433       // will fill in later.)
8434       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8435         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8436         RecordLayout[FieldIndex] = Field;
8437       }
8438     }
8439     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8440              &Data : RecordLayout) {
8441       if (Data.isNull())
8442         continue;
8443       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8444         getPlainLayout(Base, Layout, /*AsBase=*/true);
8445       else
8446         Layout.push_back(Data.get<const FieldDecl *>());
8447     }
8448   }
8449 
8450   /// Generate all the base pointers, section pointers, sizes, map types, and
8451   /// mappers for the extracted mappable expressions (all included in \a
8452   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8453   /// pair of the relevant declaration and index where it occurs is appended to
8454   /// the device pointers info array.
8455   void generateAllInfoForClauses(
8456       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8457       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8458           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8459     // We have to process the component lists that relate with the same
8460     // declaration in a single chunk so that we can generate the map flags
8461     // correctly. Therefore, we organize all lists in a map.
8462     enum MapKind { Present, Allocs, Other, Total };
8463     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8464                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8465         Info;
8466 
8467     // Helper function to fill the information map for the different supported
8468     // clauses.
8469     auto &&InfoGen =
8470         [&Info, &SkipVarSet](
8471             const ValueDecl *D, MapKind Kind,
8472             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8473             OpenMPMapClauseKind MapType,
8474             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8475             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8476             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8477             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8478           if (SkipVarSet.contains(D))
8479             return;
8480           auto It = Info.find(D);
8481           if (It == Info.end())
8482             It = Info
8483                      .insert(std::make_pair(
8484                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8485                      .first;
8486           It->second[Kind].emplace_back(
8487               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8488               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8489         };
8490 
8491     for (const auto *Cl : Clauses) {
8492       const auto *C = dyn_cast<OMPMapClause>(Cl);
8493       if (!C)
8494         continue;
8495       MapKind Kind = Other;
8496       if (llvm::is_contained(C->getMapTypeModifiers(),
8497                              OMPC_MAP_MODIFIER_present))
8498         Kind = Present;
8499       else if (C->getMapType() == OMPC_MAP_alloc)
8500         Kind = Allocs;
8501       const auto *EI = C->getVarRefs().begin();
8502       for (const auto L : C->component_lists()) {
8503         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8504         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8505                 C->getMapTypeModifiers(), llvm::None,
8506                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8507                 E);
8508         ++EI;
8509       }
8510     }
8511     for (const auto *Cl : Clauses) {
8512       const auto *C = dyn_cast<OMPToClause>(Cl);
8513       if (!C)
8514         continue;
8515       MapKind Kind = Other;
8516       if (llvm::is_contained(C->getMotionModifiers(),
8517                              OMPC_MOTION_MODIFIER_present))
8518         Kind = Present;
8519       const auto *EI = C->getVarRefs().begin();
8520       for (const auto L : C->component_lists()) {
8521         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8522                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8523                 C->isImplicit(), std::get<2>(L), *EI);
8524         ++EI;
8525       }
8526     }
8527     for (const auto *Cl : Clauses) {
8528       const auto *C = dyn_cast<OMPFromClause>(Cl);
8529       if (!C)
8530         continue;
8531       MapKind Kind = Other;
8532       if (llvm::is_contained(C->getMotionModifiers(),
8533                              OMPC_MOTION_MODIFIER_present))
8534         Kind = Present;
8535       const auto *EI = C->getVarRefs().begin();
8536       for (const auto L : C->component_lists()) {
8537         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8538                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8539                 C->isImplicit(), std::get<2>(L), *EI);
8540         ++EI;
8541       }
8542     }
8543 
8544     // Look at the use_device_ptr clause information and mark the existing map
8545     // entries as such. If there is no map information for an entry in the
8546     // use_device_ptr list, we create one with map type 'alloc' and zero size
8547     // section. It is the user fault if that was not mapped before. If there is
8548     // no map information and the pointer is a struct member, then we defer the
8549     // emission of that entry until the whole struct has been processed.
8550     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8551                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8552         DeferredInfo;
8553     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8554 
8555     for (const auto *Cl : Clauses) {
8556       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8557       if (!C)
8558         continue;
8559       for (const auto L : C->component_lists()) {
8560         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8561             std::get<1>(L);
8562         assert(!Components.empty() &&
8563                "Not expecting empty list of components!");
8564         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8565         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8566         const Expr *IE = Components.back().getAssociatedExpression();
8567         // If the first component is a member expression, we have to look into
8568         // 'this', which maps to null in the map of map information. Otherwise
8569         // look directly for the information.
8570         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8571 
8572         // We potentially have map information for this declaration already.
8573         // Look for the first set of components that refer to it.
8574         if (It != Info.end()) {
8575           bool Found = false;
8576           for (auto &Data : It->second) {
8577             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8578               return MI.Components.back().getAssociatedDeclaration() == VD;
8579             });
8580             // If we found a map entry, signal that the pointer has to be
8581             // returned and move on to the next declaration. Exclude cases where
8582             // the base pointer is mapped as array subscript, array section or
8583             // array shaping. The base address is passed as a pointer to base in
8584             // this case and cannot be used as a base for use_device_ptr list
8585             // item.
8586             if (CI != Data.end()) {
8587               auto PrevCI = std::next(CI->Components.rbegin());
8588               const auto *VarD = dyn_cast<VarDecl>(VD);
8589               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8590                   isa<MemberExpr>(IE) ||
8591                   !VD->getType().getNonReferenceType()->isPointerType() ||
8592                   PrevCI == CI->Components.rend() ||
8593                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8594                   VarD->hasLocalStorage()) {
8595                 CI->ReturnDevicePointer = true;
8596                 Found = true;
8597                 break;
8598               }
8599             }
8600           }
8601           if (Found)
8602             continue;
8603         }
8604 
8605         // We didn't find any match in our map information - generate a zero
8606         // size array section - if the pointer is a struct member we defer this
8607         // action until the whole struct has been processed.
8608         if (isa<MemberExpr>(IE)) {
8609           // Insert the pointer into Info to be processed by
8610           // generateInfoForComponentList. Because it is a member pointer
8611           // without a pointee, no entry will be generated for it, therefore
8612           // we need to generate one after the whole struct has been processed.
8613           // Nonetheless, generateInfoForComponentList must be called to take
8614           // the pointer into account for the calculation of the range of the
8615           // partial struct.
8616           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8617                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8618                   nullptr);
8619           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8620         } else {
8621           llvm::Value *Ptr =
8622               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8623           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8624           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8625           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8626           UseDevicePtrCombinedInfo.Sizes.push_back(
8627               llvm::Constant::getNullValue(CGF.Int64Ty));
8628           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8629           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8630         }
8631       }
8632     }
8633 
8634     // Look at the use_device_addr clause information and mark the existing map
8635     // entries as such. If there is no map information for an entry in the
8636     // use_device_addr list, we create one with map type 'alloc' and zero size
8637     // section. It is the user fault if that was not mapped before. If there is
8638     // no map information and the pointer is a struct member, then we defer the
8639     // emission of that entry until the whole struct has been processed.
8640     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8641     for (const auto *Cl : Clauses) {
8642       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8643       if (!C)
8644         continue;
8645       for (const auto L : C->component_lists()) {
8646         assert(!std::get<1>(L).empty() &&
8647                "Not expecting empty list of components!");
8648         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8649         if (!Processed.insert(VD).second)
8650           continue;
8651         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8652         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8653         // If the first component is a member expression, we have to look into
8654         // 'this', which maps to null in the map of map information. Otherwise
8655         // look directly for the information.
8656         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8657 
8658         // We potentially have map information for this declaration already.
8659         // Look for the first set of components that refer to it.
8660         if (It != Info.end()) {
8661           bool Found = false;
8662           for (auto &Data : It->second) {
8663             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8664               return MI.Components.back().getAssociatedDeclaration() == VD;
8665             });
8666             // If we found a map entry, signal that the pointer has to be
8667             // returned and move on to the next declaration.
8668             if (CI != Data.end()) {
8669               CI->ReturnDevicePointer = true;
8670               Found = true;
8671               break;
8672             }
8673           }
8674           if (Found)
8675             continue;
8676         }
8677 
8678         // We didn't find any match in our map information - generate a zero
8679         // size array section - if the pointer is a struct member we defer this
8680         // action until the whole struct has been processed.
8681         if (isa<MemberExpr>(IE)) {
8682           // Insert the pointer into Info to be processed by
8683           // generateInfoForComponentList. Because it is a member pointer
8684           // without a pointee, no entry will be generated for it, therefore
8685           // we need to generate one after the whole struct has been processed.
8686           // Nonetheless, generateInfoForComponentList must be called to take
8687           // the pointer into account for the calculation of the range of the
8688           // partial struct.
8689           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8690                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8691                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8692           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8693         } else {
8694           llvm::Value *Ptr;
8695           if (IE->isGLValue())
8696             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8697           else
8698             Ptr = CGF.EmitScalarExpr(IE);
8699           CombinedInfo.Exprs.push_back(VD);
8700           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8701           CombinedInfo.Pointers.push_back(Ptr);
8702           CombinedInfo.Sizes.push_back(
8703               llvm::Constant::getNullValue(CGF.Int64Ty));
8704           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8705           CombinedInfo.Mappers.push_back(nullptr);
8706         }
8707       }
8708     }
8709 
8710     for (const auto &Data : Info) {
8711       StructRangeInfoTy PartialStruct;
8712       // Temporary generated information.
8713       MapCombinedInfoTy CurInfo;
8714       const Decl *D = Data.first;
8715       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8716       for (const auto &M : Data.second) {
8717         for (const MapInfo &L : M) {
8718           assert(!L.Components.empty() &&
8719                  "Not expecting declaration with no component lists.");
8720 
8721           // Remember the current base pointer index.
8722           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8723           CurInfo.NonContigInfo.IsNonContiguous =
8724               L.Components.back().isNonContiguous();
8725           generateInfoForComponentList(
8726               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8727               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8728               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8729 
8730           // If this entry relates with a device pointer, set the relevant
8731           // declaration and add the 'return pointer' flag.
8732           if (L.ReturnDevicePointer) {
8733             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8734                    "Unexpected number of mapped base pointers.");
8735 
8736             const ValueDecl *RelevantVD =
8737                 L.Components.back().getAssociatedDeclaration();
8738             assert(RelevantVD &&
8739                    "No relevant declaration related with device pointer??");
8740 
8741             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8742                 RelevantVD);
8743             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8744           }
8745         }
8746       }
8747 
8748       // Append any pending zero-length pointers which are struct members and
8749       // used with use_device_ptr or use_device_addr.
8750       auto CI = DeferredInfo.find(Data.first);
8751       if (CI != DeferredInfo.end()) {
8752         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8753           llvm::Value *BasePtr;
8754           llvm::Value *Ptr;
8755           if (L.ForDeviceAddr) {
8756             if (L.IE->isGLValue())
8757               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8758             else
8759               Ptr = this->CGF.EmitScalarExpr(L.IE);
8760             BasePtr = Ptr;
8761             // Entry is RETURN_PARAM. Also, set the placeholder value
8762             // MEMBER_OF=FFFF so that the entry is later updated with the
8763             // correct value of MEMBER_OF.
8764             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8765           } else {
8766             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8767             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8768                                              L.IE->getExprLoc());
8769             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8770             // placeholder value MEMBER_OF=FFFF so that the entry is later
8771             // updated with the correct value of MEMBER_OF.
8772             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8773                                     OMP_MAP_MEMBER_OF);
8774           }
8775           CurInfo.Exprs.push_back(L.VD);
8776           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8777           CurInfo.Pointers.push_back(Ptr);
8778           CurInfo.Sizes.push_back(
8779               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8780           CurInfo.Mappers.push_back(nullptr);
8781         }
8782       }
8783       // If there is an entry in PartialStruct it means we have a struct with
8784       // individual members mapped. Emit an extra combined entry.
8785       if (PartialStruct.Base.isValid()) {
8786         CurInfo.NonContigInfo.Dims.push_back(0);
8787         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8788       }
8789 
8790       // We need to append the results of this capture to what we already
8791       // have.
8792       CombinedInfo.append(CurInfo);
8793     }
8794     // Append data for use_device_ptr clauses.
8795     CombinedInfo.append(UseDevicePtrCombinedInfo);
8796   }
8797 
8798 public:
8799   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8800       : CurDir(&Dir), CGF(CGF) {
8801     // Extract firstprivate clause information.
8802     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8803       for (const auto *D : C->varlists())
8804         FirstPrivateDecls.try_emplace(
8805             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8806     // Extract implicit firstprivates from uses_allocators clauses.
8807     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8808       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8809         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8810         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8811           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8812                                         /*Implicit=*/true);
8813         else if (const auto *VD = dyn_cast<VarDecl>(
8814                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8815                          ->getDecl()))
8816           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8817       }
8818     }
8819     // Extract device pointer clause information.
8820     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8821       for (auto L : C->component_lists())
8822         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8823     // Extract map information.
8824     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8825       if (C->getMapType() != OMPC_MAP_to)
8826         continue;
8827       for (auto L : C->component_lists()) {
8828         const ValueDecl *VD = std::get<0>(L);
8829         const auto *RD = VD ? VD->getType()
8830                                   .getCanonicalType()
8831                                   .getNonReferenceType()
8832                                   ->getAsCXXRecordDecl()
8833                             : nullptr;
8834         if (RD && RD->isLambda())
8835           LambdasMap.try_emplace(std::get<0>(L), C);
8836       }
8837     }
8838   }
8839 
8840   /// Constructor for the declare mapper directive.
8841   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8842       : CurDir(&Dir), CGF(CGF) {}
8843 
8844   /// Generate code for the combined entry if we have a partially mapped struct
8845   /// and take care of the mapping flags of the arguments corresponding to
8846   /// individual struct members.
8847   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8848                          MapFlagsArrayTy &CurTypes,
8849                          const StructRangeInfoTy &PartialStruct,
8850                          const ValueDecl *VD = nullptr,
8851                          bool NotTargetParams = true) const {
8852     if (CurTypes.size() == 1 &&
8853         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8854         !PartialStruct.IsArraySection)
8855       return;
8856     Address LBAddr = PartialStruct.LowestElem.second;
8857     Address HBAddr = PartialStruct.HighestElem.second;
8858     if (PartialStruct.HasCompleteRecord) {
8859       LBAddr = PartialStruct.LB;
8860       HBAddr = PartialStruct.LB;
8861     }
8862     CombinedInfo.Exprs.push_back(VD);
8863     // Base is the base of the struct
8864     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8865     // Pointer is the address of the lowest element
8866     llvm::Value *LB = LBAddr.getPointer();
8867     CombinedInfo.Pointers.push_back(LB);
8868     // There should not be a mapper for a combined entry.
8869     CombinedInfo.Mappers.push_back(nullptr);
8870     // Size is (addr of {highest+1} element) - (addr of lowest element)
8871     llvm::Value *HB = HBAddr.getPointer();
8872     llvm::Value *HAddr =
8873         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8874     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8875     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8876     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8877     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8878                                                   /*isSigned=*/false);
8879     CombinedInfo.Sizes.push_back(Size);
8880     // Map type is always TARGET_PARAM, if generate info for captures.
8881     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8882                                                  : OMP_MAP_TARGET_PARAM);
8883     // If any element has the present modifier, then make sure the runtime
8884     // doesn't attempt to allocate the struct.
8885     if (CurTypes.end() !=
8886         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8887           return Type & OMP_MAP_PRESENT;
8888         }))
8889       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8890     // Remove TARGET_PARAM flag from the first element
8891     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8892     // If any element has the ompx_hold modifier, then make sure the runtime
8893     // uses the hold reference count for the struct as a whole so that it won't
8894     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8895     // elements as well so the runtime knows which reference count to check
8896     // when determining whether it's time for device-to-host transfers of
8897     // individual elements.
8898     if (CurTypes.end() !=
8899         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8900           return Type & OMP_MAP_OMPX_HOLD;
8901         })) {
8902       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8903       for (auto &M : CurTypes)
8904         M |= OMP_MAP_OMPX_HOLD;
8905     }
8906 
8907     // All other current entries will be MEMBER_OF the combined entry
8908     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8909     // 0xFFFF in the MEMBER_OF field).
8910     OpenMPOffloadMappingFlags MemberOfFlag =
8911         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8912     for (auto &M : CurTypes)
8913       setCorrectMemberOfFlag(M, MemberOfFlag);
8914   }
8915 
8916   /// Generate all the base pointers, section pointers, sizes, map types, and
8917   /// mappers for the extracted mappable expressions (all included in \a
8918   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8919   /// pair of the relevant declaration and index where it occurs is appended to
8920   /// the device pointers info array.
8921   void generateAllInfo(
8922       MapCombinedInfoTy &CombinedInfo,
8923       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8924           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8925     assert(CurDir.is<const OMPExecutableDirective *>() &&
8926            "Expect a executable directive");
8927     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8928     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8929   }
8930 
8931   /// Generate all the base pointers, section pointers, sizes, map types, and
8932   /// mappers for the extracted map clauses of user-defined mapper (all included
8933   /// in \a CombinedInfo).
8934   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8935     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8936            "Expect a declare mapper directive");
8937     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8938     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8939   }
8940 
8941   /// Emit capture info for lambdas for variables captured by reference.
8942   void generateInfoForLambdaCaptures(
8943       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8944       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8945     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8946     const auto *RD = VDType->getAsCXXRecordDecl();
8947     if (!RD || !RD->isLambda())
8948       return;
8949     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8950                    CGF.getContext().getDeclAlign(VD));
8951     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8952     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8953     FieldDecl *ThisCapture = nullptr;
8954     RD->getCaptureFields(Captures, ThisCapture);
8955     if (ThisCapture) {
8956       LValue ThisLVal =
8957           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8958       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8959       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8960                                  VDLVal.getPointer(CGF));
8961       CombinedInfo.Exprs.push_back(VD);
8962       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8963       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8964       CombinedInfo.Sizes.push_back(
8965           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8966                                     CGF.Int64Ty, /*isSigned=*/true));
8967       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8968                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8969       CombinedInfo.Mappers.push_back(nullptr);
8970     }
8971     for (const LambdaCapture &LC : RD->captures()) {
8972       if (!LC.capturesVariable())
8973         continue;
8974       const VarDecl *VD = LC.getCapturedVar();
8975       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8976         continue;
8977       auto It = Captures.find(VD);
8978       assert(It != Captures.end() && "Found lambda capture without field.");
8979       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8980       if (LC.getCaptureKind() == LCK_ByRef) {
8981         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8982         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8983                                    VDLVal.getPointer(CGF));
8984         CombinedInfo.Exprs.push_back(VD);
8985         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8986         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8987         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8988             CGF.getTypeSize(
8989                 VD->getType().getCanonicalType().getNonReferenceType()),
8990             CGF.Int64Ty, /*isSigned=*/true));
8991       } else {
8992         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8993         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8994                                    VDLVal.getPointer(CGF));
8995         CombinedInfo.Exprs.push_back(VD);
8996         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8997         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8998         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8999       }
9000       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9001                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9002       CombinedInfo.Mappers.push_back(nullptr);
9003     }
9004   }
9005 
9006   /// Set correct indices for lambdas captures.
9007   void adjustMemberOfForLambdaCaptures(
9008       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9009       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9010       MapFlagsArrayTy &Types) const {
9011     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9012       // Set correct member_of idx for all implicit lambda captures.
9013       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9014                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9015         continue;
9016       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9017       assert(BasePtr && "Unable to find base lambda address.");
9018       int TgtIdx = -1;
9019       for (unsigned J = I; J > 0; --J) {
9020         unsigned Idx = J - 1;
9021         if (Pointers[Idx] != BasePtr)
9022           continue;
9023         TgtIdx = Idx;
9024         break;
9025       }
9026       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9027       // All other current entries will be MEMBER_OF the combined entry
9028       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9029       // 0xFFFF in the MEMBER_OF field).
9030       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9031       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9032     }
9033   }
9034 
9035   /// Generate the base pointers, section pointers, sizes, map types, and
9036   /// mappers associated to a given capture (all included in \a CombinedInfo).
9037   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9038                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9039                               StructRangeInfoTy &PartialStruct) const {
9040     assert(!Cap->capturesVariableArrayType() &&
9041            "Not expecting to generate map info for a variable array type!");
9042 
9043     // We need to know when we generating information for the first component
9044     const ValueDecl *VD = Cap->capturesThis()
9045                               ? nullptr
9046                               : Cap->getCapturedVar()->getCanonicalDecl();
9047 
9048     // for map(to: lambda): skip here, processing it in
9049     // generateDefaultMapInfo
9050     if (LambdasMap.count(VD))
9051       return;
9052 
9053     // If this declaration appears in a is_device_ptr clause we just have to
9054     // pass the pointer by value. If it is a reference to a declaration, we just
9055     // pass its value.
9056     if (DevPointersMap.count(VD)) {
9057       CombinedInfo.Exprs.push_back(VD);
9058       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9059       CombinedInfo.Pointers.push_back(Arg);
9060       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9061           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9062           /*isSigned=*/true));
9063       CombinedInfo.Types.push_back(
9064           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9065           OMP_MAP_TARGET_PARAM);
9066       CombinedInfo.Mappers.push_back(nullptr);
9067       return;
9068     }
9069 
9070     using MapData =
9071         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9072                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9073                    const ValueDecl *, const Expr *>;
9074     SmallVector<MapData, 4> DeclComponentLists;
9075     assert(CurDir.is<const OMPExecutableDirective *>() &&
9076            "Expect a executable directive");
9077     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9078     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9079       const auto *EI = C->getVarRefs().begin();
9080       for (const auto L : C->decl_component_lists(VD)) {
9081         const ValueDecl *VDecl, *Mapper;
9082         // The Expression is not correct if the mapping is implicit
9083         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9084         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9085         std::tie(VDecl, Components, Mapper) = L;
9086         assert(VDecl == VD && "We got information for the wrong declaration??");
9087         assert(!Components.empty() &&
9088                "Not expecting declaration with no component lists.");
9089         DeclComponentLists.emplace_back(Components, C->getMapType(),
9090                                         C->getMapTypeModifiers(),
9091                                         C->isImplicit(), Mapper, E);
9092         ++EI;
9093       }
9094     }
9095     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9096                                              const MapData &RHS) {
9097       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9098       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9099       bool HasPresent =
9100           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9101       bool HasAllocs = MapType == OMPC_MAP_alloc;
9102       MapModifiers = std::get<2>(RHS);
9103       MapType = std::get<1>(LHS);
9104       bool HasPresentR =
9105           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9106       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9107       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9108     });
9109 
9110     // Find overlapping elements (including the offset from the base element).
9111     llvm::SmallDenseMap<
9112         const MapData *,
9113         llvm::SmallVector<
9114             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9115         4>
9116         OverlappedData;
9117     size_t Count = 0;
9118     for (const MapData &L : DeclComponentLists) {
9119       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9120       OpenMPMapClauseKind MapType;
9121       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9122       bool IsImplicit;
9123       const ValueDecl *Mapper;
9124       const Expr *VarRef;
9125       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9126           L;
9127       ++Count;
9128       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9129         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9130         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9131                  VarRef) = L1;
9132         auto CI = Components.rbegin();
9133         auto CE = Components.rend();
9134         auto SI = Components1.rbegin();
9135         auto SE = Components1.rend();
9136         for (; CI != CE && SI != SE; ++CI, ++SI) {
9137           if (CI->getAssociatedExpression()->getStmtClass() !=
9138               SI->getAssociatedExpression()->getStmtClass())
9139             break;
9140           // Are we dealing with different variables/fields?
9141           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9142             break;
9143         }
9144         // Found overlapping if, at least for one component, reached the head
9145         // of the components list.
9146         if (CI == CE || SI == SE) {
9147           // Ignore it if it is the same component.
9148           if (CI == CE && SI == SE)
9149             continue;
9150           const auto It = (SI == SE) ? CI : SI;
9151           // If one component is a pointer and another one is a kind of
9152           // dereference of this pointer (array subscript, section, dereference,
9153           // etc.), it is not an overlapping.
9154           // Same, if one component is a base and another component is a
9155           // dereferenced pointer memberexpr with the same base.
9156           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9157               (std::prev(It)->getAssociatedDeclaration() &&
9158                std::prev(It)
9159                    ->getAssociatedDeclaration()
9160                    ->getType()
9161                    ->isPointerType()) ||
9162               (It->getAssociatedDeclaration() &&
9163                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9164                std::next(It) != CE && std::next(It) != SE))
9165             continue;
9166           const MapData &BaseData = CI == CE ? L : L1;
9167           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9168               SI == SE ? Components : Components1;
9169           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9170           OverlappedElements.getSecond().push_back(SubData);
9171         }
9172       }
9173     }
9174     // Sort the overlapped elements for each item.
9175     llvm::SmallVector<const FieldDecl *, 4> Layout;
9176     if (!OverlappedData.empty()) {
9177       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9178       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9179       while (BaseType != OrigType) {
9180         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9181         OrigType = BaseType->getPointeeOrArrayElementType();
9182       }
9183 
9184       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9185         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9186       else {
9187         const auto *RD = BaseType->getAsRecordDecl();
9188         Layout.append(RD->field_begin(), RD->field_end());
9189       }
9190     }
9191     for (auto &Pair : OverlappedData) {
9192       llvm::stable_sort(
9193           Pair.getSecond(),
9194           [&Layout](
9195               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9196               OMPClauseMappableExprCommon::MappableExprComponentListRef
9197                   Second) {
9198             auto CI = First.rbegin();
9199             auto CE = First.rend();
9200             auto SI = Second.rbegin();
9201             auto SE = Second.rend();
9202             for (; CI != CE && SI != SE; ++CI, ++SI) {
9203               if (CI->getAssociatedExpression()->getStmtClass() !=
9204                   SI->getAssociatedExpression()->getStmtClass())
9205                 break;
9206               // Are we dealing with different variables/fields?
9207               if (CI->getAssociatedDeclaration() !=
9208                   SI->getAssociatedDeclaration())
9209                 break;
9210             }
9211 
9212             // Lists contain the same elements.
9213             if (CI == CE && SI == SE)
9214               return false;
9215 
9216             // List with less elements is less than list with more elements.
9217             if (CI == CE || SI == SE)
9218               return CI == CE;
9219 
9220             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9221             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9222             if (FD1->getParent() == FD2->getParent())
9223               return FD1->getFieldIndex() < FD2->getFieldIndex();
9224             const auto *It =
9225                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9226                   return FD == FD1 || FD == FD2;
9227                 });
9228             return *It == FD1;
9229           });
9230     }
9231 
9232     // Associated with a capture, because the mapping flags depend on it.
9233     // Go through all of the elements with the overlapped elements.
9234     bool IsFirstComponentList = true;
9235     for (const auto &Pair : OverlappedData) {
9236       const MapData &L = *Pair.getFirst();
9237       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9238       OpenMPMapClauseKind MapType;
9239       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9240       bool IsImplicit;
9241       const ValueDecl *Mapper;
9242       const Expr *VarRef;
9243       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9244           L;
9245       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9246           OverlappedComponents = Pair.getSecond();
9247       generateInfoForComponentList(
9248           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9249           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9250           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9251       IsFirstComponentList = false;
9252     }
9253     // Go through other elements without overlapped elements.
9254     for (const MapData &L : DeclComponentLists) {
9255       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9256       OpenMPMapClauseKind MapType;
9257       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9258       bool IsImplicit;
9259       const ValueDecl *Mapper;
9260       const Expr *VarRef;
9261       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9262           L;
9263       auto It = OverlappedData.find(&L);
9264       if (It == OverlappedData.end())
9265         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9266                                      Components, CombinedInfo, PartialStruct,
9267                                      IsFirstComponentList, IsImplicit, Mapper,
9268                                      /*ForDeviceAddr=*/false, VD, VarRef);
9269       IsFirstComponentList = false;
9270     }
9271   }
9272 
9273   /// Generate the default map information for a given capture \a CI,
9274   /// record field declaration \a RI and captured value \a CV.
9275   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9276                               const FieldDecl &RI, llvm::Value *CV,
9277                               MapCombinedInfoTy &CombinedInfo) const {
9278     bool IsImplicit = true;
9279     // Do the default mapping.
9280     if (CI.capturesThis()) {
9281       CombinedInfo.Exprs.push_back(nullptr);
9282       CombinedInfo.BasePointers.push_back(CV);
9283       CombinedInfo.Pointers.push_back(CV);
9284       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9285       CombinedInfo.Sizes.push_back(
9286           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9287                                     CGF.Int64Ty, /*isSigned=*/true));
9288       // Default map type.
9289       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9290     } else if (CI.capturesVariableByCopy()) {
9291       const VarDecl *VD = CI.getCapturedVar();
9292       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9293       CombinedInfo.BasePointers.push_back(CV);
9294       CombinedInfo.Pointers.push_back(CV);
9295       if (!RI.getType()->isAnyPointerType()) {
9296         // We have to signal to the runtime captures passed by value that are
9297         // not pointers.
9298         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9299         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9300             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9301       } else {
9302         // Pointers are implicitly mapped with a zero size and no flags
9303         // (other than first map that is added for all implicit maps).
9304         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9305         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9306       }
9307       auto I = FirstPrivateDecls.find(VD);
9308       if (I != FirstPrivateDecls.end())
9309         IsImplicit = I->getSecond();
9310     } else {
9311       assert(CI.capturesVariable() && "Expected captured reference.");
9312       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9313       QualType ElementType = PtrTy->getPointeeType();
9314       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9315           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9316       // The default map type for a scalar/complex type is 'to' because by
9317       // default the value doesn't have to be retrieved. For an aggregate
9318       // type, the default is 'tofrom'.
9319       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9320       const VarDecl *VD = CI.getCapturedVar();
9321       auto I = FirstPrivateDecls.find(VD);
9322       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9323       CombinedInfo.BasePointers.push_back(CV);
9324       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9325         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9326             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9327             AlignmentSource::Decl));
9328         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9329       } else {
9330         CombinedInfo.Pointers.push_back(CV);
9331       }
9332       if (I != FirstPrivateDecls.end())
9333         IsImplicit = I->getSecond();
9334     }
9335     // Every default map produces a single argument which is a target parameter.
9336     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9337 
9338     // Add flag stating this is an implicit map.
9339     if (IsImplicit)
9340       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9341 
9342     // No user-defined mapper for default mapping.
9343     CombinedInfo.Mappers.push_back(nullptr);
9344   }
9345 };
9346 } // anonymous namespace
9347 
9348 static void emitNonContiguousDescriptor(
9349     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9350     CGOpenMPRuntime::TargetDataInfo &Info) {
9351   CodeGenModule &CGM = CGF.CGM;
9352   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9353       &NonContigInfo = CombinedInfo.NonContigInfo;
9354 
9355   // Build an array of struct descriptor_dim and then assign it to
9356   // offload_args.
9357   //
9358   // struct descriptor_dim {
9359   //  uint64_t offset;
9360   //  uint64_t count;
9361   //  uint64_t stride
9362   // };
9363   ASTContext &C = CGF.getContext();
9364   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9365   RecordDecl *RD;
9366   RD = C.buildImplicitRecord("descriptor_dim");
9367   RD->startDefinition();
9368   addFieldToRecordDecl(C, RD, Int64Ty);
9369   addFieldToRecordDecl(C, RD, Int64Ty);
9370   addFieldToRecordDecl(C, RD, Int64Ty);
9371   RD->completeDefinition();
9372   QualType DimTy = C.getRecordType(RD);
9373 
9374   enum { OffsetFD = 0, CountFD, StrideFD };
9375   // We need two index variable here since the size of "Dims" is the same as the
9376   // size of Components, however, the size of offset, count, and stride is equal
9377   // to the size of base declaration that is non-contiguous.
9378   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9379     // Skip emitting ir if dimension size is 1 since it cannot be
9380     // non-contiguous.
9381     if (NonContigInfo.Dims[I] == 1)
9382       continue;
9383     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9384     QualType ArrayTy =
9385         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9386     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9387     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9388       unsigned RevIdx = EE - II - 1;
9389       LValue DimsLVal = CGF.MakeAddrLValue(
9390           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9391       // Offset
9392       LValue OffsetLVal = CGF.EmitLValueForField(
9393           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9394       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9395       // Count
9396       LValue CountLVal = CGF.EmitLValueForField(
9397           DimsLVal, *std::next(RD->field_begin(), CountFD));
9398       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9399       // Stride
9400       LValue StrideLVal = CGF.EmitLValueForField(
9401           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9402       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9403     }
9404     // args[I] = &dims
9405     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9406         DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9407     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9408         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9409         Info.PointersArray, 0, I);
9410     Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
9411     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9412     ++L;
9413   }
9414 }
9415 
9416 // Try to extract the base declaration from a `this->x` expression if possible.
9417 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9418   if (!E)
9419     return nullptr;
9420 
9421   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9422     if (const MemberExpr *ME =
9423             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9424       return ME->getMemberDecl();
9425   return nullptr;
9426 }
9427 
9428 /// Emit a string constant containing the names of the values mapped to the
9429 /// offloading runtime library.
9430 llvm::Constant *
9431 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9432                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9433 
9434   uint32_t SrcLocStrSize;
9435   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9436     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9437 
9438   SourceLocation Loc;
9439   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9440     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9441       Loc = VD->getLocation();
9442     else
9443       Loc = MapExprs.getMapExpr()->getExprLoc();
9444   } else {
9445     Loc = MapExprs.getMapDecl()->getLocation();
9446   }
9447 
9448   std::string ExprName;
9449   if (MapExprs.getMapExpr()) {
9450     PrintingPolicy P(CGF.getContext().getLangOpts());
9451     llvm::raw_string_ostream OS(ExprName);
9452     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9453     OS.flush();
9454   } else {
9455     ExprName = MapExprs.getMapDecl()->getNameAsString();
9456   }
9457 
9458   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9459   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9460                                          PLoc.getLine(), PLoc.getColumn(),
9461                                          SrcLocStrSize);
9462 }
9463 
9464 /// Emit the arrays used to pass the captures and map information to the
9465 /// offloading runtime library. If there is no map or capture information,
9466 /// return nullptr by reference.
9467 static void emitOffloadingArrays(
9468     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9469     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9470     bool IsNonContiguous = false) {
9471   CodeGenModule &CGM = CGF.CGM;
9472   ASTContext &Ctx = CGF.getContext();
9473 
9474   // Reset the array information.
9475   Info.clearArrayInfo();
9476   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9477 
9478   if (Info.NumberOfPtrs) {
9479     // Detect if we have any capture size requiring runtime evaluation of the
9480     // size so that a constant array could be eventually used.
9481 
9482     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9483     QualType PointerArrayType = Ctx.getConstantArrayType(
9484         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9485         /*IndexTypeQuals=*/0);
9486 
9487     Info.BasePointersArray =
9488         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9489     Info.PointersArray =
9490         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9491     Address MappersArray =
9492         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9493     Info.MappersArray = MappersArray.getPointer();
9494 
9495     // If we don't have any VLA types or other types that require runtime
9496     // evaluation, we can use a constant array for the map sizes, otherwise we
9497     // need to fill up the arrays as we do for the pointers.
9498     QualType Int64Ty =
9499         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9500     SmallVector<llvm::Constant *> ConstSizes(
9501         CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9502     llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9503     for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9504       if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9505         if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9506           if (IsNonContiguous && (CombinedInfo.Types[I] &
9507                                   MappableExprsHandler::OMP_MAP_NON_CONTIG))
9508             ConstSizes[I] = llvm::ConstantInt::get(
9509                 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9510           else
9511             ConstSizes[I] = CI;
9512           continue;
9513         }
9514       }
9515       RuntimeSizes.set(I);
9516     }
9517 
9518     if (RuntimeSizes.all()) {
9519       QualType SizeArrayType = Ctx.getConstantArrayType(
9520           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9521           /*IndexTypeQuals=*/0);
9522       Info.SizesArray =
9523           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9524     } else {
9525       auto *SizesArrayInit = llvm::ConstantArray::get(
9526           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9527       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9528       auto *SizesArrayGbl = new llvm::GlobalVariable(
9529           CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9530           llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9531       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9532       if (RuntimeSizes.any()) {
9533         QualType SizeArrayType = Ctx.getConstantArrayType(
9534             Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9535             /*IndexTypeQuals=*/0);
9536         Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9537         llvm::Value *GblConstPtr =
9538             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9539                 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9540         CGF.Builder.CreateMemCpy(
9541             Buffer,
9542             Address(GblConstPtr, CGM.Int64Ty,
9543                     CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9544                         /*DestWidth=*/64, /*Signed=*/false))),
9545             CGF.getTypeSize(SizeArrayType));
9546         Info.SizesArray = Buffer.getPointer();
9547       } else {
9548         Info.SizesArray = SizesArrayGbl;
9549       }
9550     }
9551 
9552     // The map types are always constant so we don't need to generate code to
9553     // fill arrays. Instead, we create an array constant.
9554     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9555     llvm::copy(CombinedInfo.Types, Mapping.begin());
9556     std::string MaptypesName =
9557         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9558     auto *MapTypesArrayGbl =
9559         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9560     Info.MapTypesArray = MapTypesArrayGbl;
9561 
9562     // The information types are only built if there is debug information
9563     // requested.
9564     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9565       Info.MapNamesArray = llvm::Constant::getNullValue(
9566           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9567     } else {
9568       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9569         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9570       };
9571       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9572       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9573       std::string MapnamesName =
9574           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9575       auto *MapNamesArrayGbl =
9576           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9577       Info.MapNamesArray = MapNamesArrayGbl;
9578     }
9579 
9580     // If there's a present map type modifier, it must not be applied to the end
9581     // of a region, so generate a separate map type array in that case.
9582     if (Info.separateBeginEndCalls()) {
9583       bool EndMapTypesDiffer = false;
9584       for (uint64_t &Type : Mapping) {
9585         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9586           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9587           EndMapTypesDiffer = true;
9588         }
9589       }
9590       if (EndMapTypesDiffer) {
9591         MapTypesArrayGbl =
9592             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9593         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9594       }
9595     }
9596 
9597     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9598       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9599       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9600           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9601           Info.BasePointersArray, 0, I);
9602       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9603           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9604       Address BPAddr(BP, BPVal->getType(),
9605                      Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9606       CGF.Builder.CreateStore(BPVal, BPAddr);
9607 
9608       if (Info.requiresDevicePointerInfo())
9609         if (const ValueDecl *DevVD =
9610                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9611           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9612 
9613       llvm::Value *PVal = CombinedInfo.Pointers[I];
9614       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9615           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9616           Info.PointersArray, 0, I);
9617       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9618           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9619       Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9620       CGF.Builder.CreateStore(PVal, PAddr);
9621 
9622       if (RuntimeSizes.test(I)) {
9623         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9624             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9625             Info.SizesArray,
9626             /*Idx0=*/0,
9627             /*Idx1=*/I);
9628         Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
9629         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9630                                                           CGM.Int64Ty,
9631                                                           /*isSigned=*/true),
9632                                 SAddr);
9633       }
9634 
9635       // Fill up the mapper array.
9636       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9637       if (CombinedInfo.Mappers[I]) {
9638         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9639             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9640         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9641         Info.HasMapper = true;
9642       }
9643       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9644       CGF.Builder.CreateStore(MFunc, MAddr);
9645     }
9646   }
9647 
9648   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9649       Info.NumberOfPtrs == 0)
9650     return;
9651 
9652   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9653 }
9654 
9655 namespace {
9656 /// Additional arguments for emitOffloadingArraysArgument function.
9657 struct ArgumentsOptions {
9658   bool ForEndCall = false;
9659   ArgumentsOptions() = default;
9660   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9661 };
9662 } // namespace
9663 
9664 /// Emit the arguments to be passed to the runtime library based on the
9665 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9666 /// ForEndCall, emit map types to be passed for the end of the region instead of
9667 /// the beginning.
9668 static void emitOffloadingArraysArgument(
9669     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9670     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9671     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9672     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9673     const ArgumentsOptions &Options = ArgumentsOptions()) {
9674   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9675          "expected region end call to runtime only when end call is separate");
9676   CodeGenModule &CGM = CGF.CGM;
9677   if (Info.NumberOfPtrs) {
9678     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9679         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9680         Info.BasePointersArray,
9681         /*Idx0=*/0, /*Idx1=*/0);
9682     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9683         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9684         Info.PointersArray,
9685         /*Idx0=*/0,
9686         /*Idx1=*/0);
9687     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9688         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9689         /*Idx0=*/0, /*Idx1=*/0);
9690     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9691         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9692         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9693                                                     : Info.MapTypesArray,
9694         /*Idx0=*/0,
9695         /*Idx1=*/0);
9696 
9697     // Only emit the mapper information arrays if debug information is
9698     // requested.
9699     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9700       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9701     else
9702       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9703           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9704           Info.MapNamesArray,
9705           /*Idx0=*/0,
9706           /*Idx1=*/0);
9707     // If there is no user-defined mapper, set the mapper array to nullptr to
9708     // avoid an unnecessary data privatization
9709     if (!Info.HasMapper)
9710       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9711     else
9712       MappersArrayArg =
9713           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9714   } else {
9715     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9716     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9717     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9718     MapTypesArrayArg =
9719         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9720     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9721     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9722   }
9723 }
9724 
9725 /// Check for inner distribute directive.
9726 static const OMPExecutableDirective *
9727 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9728   const auto *CS = D.getInnermostCapturedStmt();
9729   const auto *Body =
9730       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9731   const Stmt *ChildStmt =
9732       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9733 
9734   if (const auto *NestedDir =
9735           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9736     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9737     switch (D.getDirectiveKind()) {
9738     case OMPD_target:
9739       if (isOpenMPDistributeDirective(DKind))
9740         return NestedDir;
9741       if (DKind == OMPD_teams) {
9742         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9743             /*IgnoreCaptured=*/true);
9744         if (!Body)
9745           return nullptr;
9746         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9747         if (const auto *NND =
9748                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9749           DKind = NND->getDirectiveKind();
9750           if (isOpenMPDistributeDirective(DKind))
9751             return NND;
9752         }
9753       }
9754       return nullptr;
9755     case OMPD_target_teams:
9756       if (isOpenMPDistributeDirective(DKind))
9757         return NestedDir;
9758       return nullptr;
9759     case OMPD_target_parallel:
9760     case OMPD_target_simd:
9761     case OMPD_target_parallel_for:
9762     case OMPD_target_parallel_for_simd:
9763       return nullptr;
9764     case OMPD_target_teams_distribute:
9765     case OMPD_target_teams_distribute_simd:
9766     case OMPD_target_teams_distribute_parallel_for:
9767     case OMPD_target_teams_distribute_parallel_for_simd:
9768     case OMPD_parallel:
9769     case OMPD_for:
9770     case OMPD_parallel_for:
9771     case OMPD_parallel_master:
9772     case OMPD_parallel_sections:
9773     case OMPD_for_simd:
9774     case OMPD_parallel_for_simd:
9775     case OMPD_cancel:
9776     case OMPD_cancellation_point:
9777     case OMPD_ordered:
9778     case OMPD_threadprivate:
9779     case OMPD_allocate:
9780     case OMPD_task:
9781     case OMPD_simd:
9782     case OMPD_tile:
9783     case OMPD_unroll:
9784     case OMPD_sections:
9785     case OMPD_section:
9786     case OMPD_single:
9787     case OMPD_master:
9788     case OMPD_critical:
9789     case OMPD_taskyield:
9790     case OMPD_barrier:
9791     case OMPD_taskwait:
9792     case OMPD_taskgroup:
9793     case OMPD_atomic:
9794     case OMPD_flush:
9795     case OMPD_depobj:
9796     case OMPD_scan:
9797     case OMPD_teams:
9798     case OMPD_target_data:
9799     case OMPD_target_exit_data:
9800     case OMPD_target_enter_data:
9801     case OMPD_distribute:
9802     case OMPD_distribute_simd:
9803     case OMPD_distribute_parallel_for:
9804     case OMPD_distribute_parallel_for_simd:
9805     case OMPD_teams_distribute:
9806     case OMPD_teams_distribute_simd:
9807     case OMPD_teams_distribute_parallel_for:
9808     case OMPD_teams_distribute_parallel_for_simd:
9809     case OMPD_target_update:
9810     case OMPD_declare_simd:
9811     case OMPD_declare_variant:
9812     case OMPD_begin_declare_variant:
9813     case OMPD_end_declare_variant:
9814     case OMPD_declare_target:
9815     case OMPD_end_declare_target:
9816     case OMPD_declare_reduction:
9817     case OMPD_declare_mapper:
9818     case OMPD_taskloop:
9819     case OMPD_taskloop_simd:
9820     case OMPD_master_taskloop:
9821     case OMPD_master_taskloop_simd:
9822     case OMPD_parallel_master_taskloop:
9823     case OMPD_parallel_master_taskloop_simd:
9824     case OMPD_requires:
9825     case OMPD_metadirective:
9826     case OMPD_unknown:
9827     default:
9828       llvm_unreachable("Unexpected directive.");
9829     }
9830   }
9831 
9832   return nullptr;
9833 }
9834 
9835 /// Emit the user-defined mapper function. The code generation follows the
9836 /// pattern in the example below.
9837 /// \code
9838 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9839 ///                                           void *base, void *begin,
9840 ///                                           int64_t size, int64_t type,
9841 ///                                           void *name = nullptr) {
9842 ///   // Allocate space for an array section first or add a base/begin for
9843 ///   // pointer dereference.
9844 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9845 ///       !maptype.IsDelete)
9846 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9847 ///                                 size*sizeof(Ty), clearToFromMember(type));
9848 ///   // Map members.
9849 ///   for (unsigned i = 0; i < size; i++) {
9850 ///     // For each component specified by this mapper:
9851 ///     for (auto c : begin[i]->all_components) {
9852 ///       if (c.hasMapper())
9853 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9854 ///                       c.arg_type, c.arg_name);
9855 ///       else
9856 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9857 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9858 ///                                     c.arg_name);
9859 ///     }
9860 ///   }
9861 ///   // Delete the array section.
9862 ///   if (size > 1 && maptype.IsDelete)
9863 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9864 ///                                 size*sizeof(Ty), clearToFromMember(type));
9865 /// }
9866 /// \endcode
9867 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9868                                             CodeGenFunction *CGF) {
9869   if (UDMMap.count(D) > 0)
9870     return;
9871   ASTContext &C = CGM.getContext();
9872   QualType Ty = D->getType();
9873   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9874   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9875   auto *MapperVarDecl =
9876       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9877   SourceLocation Loc = D->getLocation();
9878   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9879   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9880 
9881   // Prepare mapper function arguments and attributes.
9882   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9883                               C.VoidPtrTy, ImplicitParamDecl::Other);
9884   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9885                             ImplicitParamDecl::Other);
9886   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9887                              C.VoidPtrTy, ImplicitParamDecl::Other);
9888   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9889                             ImplicitParamDecl::Other);
9890   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9891                             ImplicitParamDecl::Other);
9892   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9893                             ImplicitParamDecl::Other);
9894   FunctionArgList Args;
9895   Args.push_back(&HandleArg);
9896   Args.push_back(&BaseArg);
9897   Args.push_back(&BeginArg);
9898   Args.push_back(&SizeArg);
9899   Args.push_back(&TypeArg);
9900   Args.push_back(&NameArg);
9901   const CGFunctionInfo &FnInfo =
9902       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9903   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9904   SmallString<64> TyStr;
9905   llvm::raw_svector_ostream Out(TyStr);
9906   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9907   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9908   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9909                                     Name, &CGM.getModule());
9910   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9911   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9912   // Start the mapper function code generation.
9913   CodeGenFunction MapperCGF(CGM);
9914   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9915   // Compute the starting and end addresses of array elements.
9916   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9917       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9918       C.getPointerType(Int64Ty), Loc);
9919   // Prepare common arguments for array initiation and deletion.
9920   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9921       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9922       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9923   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9924       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9925       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9926   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9927       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9928       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9929   // Convert the size in bytes into the number of array elements.
9930   Size = MapperCGF.Builder.CreateExactUDiv(
9931       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9932   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9933       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9934   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9935   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9936       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9937       C.getPointerType(Int64Ty), Loc);
9938   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9939       MapperCGF.GetAddrOfLocalVar(&NameArg),
9940       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9941 
9942   // Emit array initiation if this is an array section and \p MapType indicates
9943   // that memory allocation is required.
9944   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9945   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9946                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9947 
9948   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9949 
9950   // Emit the loop header block.
9951   MapperCGF.EmitBlock(HeadBB);
9952   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9953   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9954   // Evaluate whether the initial condition is satisfied.
9955   llvm::Value *IsEmpty =
9956       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9957   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9958   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9959 
9960   // Emit the loop body block.
9961   MapperCGF.EmitBlock(BodyBB);
9962   llvm::BasicBlock *LastBB = BodyBB;
9963   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9964       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9965   PtrPHI->addIncoming(PtrBegin, EntryBB);
9966   Address PtrCurrent(PtrPHI, ElemTy,
9967                      MapperCGF.GetAddrOfLocalVar(&BeginArg)
9968                          .getAlignment()
9969                          .alignmentOfArrayElement(ElementSize));
9970   // Privatize the declared variable of mapper to be the current array element.
9971   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9972   Scope.addPrivate(MapperVarDecl, PtrCurrent);
9973   (void)Scope.Privatize();
9974 
9975   // Get map clause information. Fill up the arrays with all mapped variables.
9976   MappableExprsHandler::MapCombinedInfoTy Info;
9977   MappableExprsHandler MEHandler(*D, MapperCGF);
9978   MEHandler.generateAllInfoForMapper(Info);
9979 
9980   // Call the runtime API __tgt_mapper_num_components to get the number of
9981   // pre-existing components.
9982   llvm::Value *OffloadingArgs[] = {Handle};
9983   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9984       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9985                                             OMPRTL___tgt_mapper_num_components),
9986       OffloadingArgs);
9987   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9988       PreviousSize,
9989       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9990 
9991   // Fill up the runtime mapper handle for all components.
9992   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9993     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9994         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9995     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9996         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9997     llvm::Value *CurSizeArg = Info.Sizes[I];
9998     llvm::Value *CurNameArg =
9999         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10000             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10001             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10002 
10003     // Extract the MEMBER_OF field from the map type.
10004     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10005     llvm::Value *MemberMapType =
10006         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10007 
10008     // Combine the map type inherited from user-defined mapper with that
10009     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10010     // bits of the \a MapType, which is the input argument of the mapper
10011     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10012     // bits of MemberMapType.
10013     // [OpenMP 5.0], 1.2.6. map-type decay.
10014     //        | alloc |  to   | from  | tofrom | release | delete
10015     // ----------------------------------------------------------
10016     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10017     // to     | alloc |  to   | alloc |   to   | release | delete
10018     // from   | alloc | alloc | from  |  from  | release | delete
10019     // tofrom | alloc |  to   | from  | tofrom | release | delete
10020     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10021         MapType,
10022         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10023                                    MappableExprsHandler::OMP_MAP_FROM));
10024     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10025     llvm::BasicBlock *AllocElseBB =
10026         MapperCGF.createBasicBlock("omp.type.alloc.else");
10027     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10028     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10029     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10030     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10031     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10032     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10033     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10034     MapperCGF.EmitBlock(AllocBB);
10035     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10036         MemberMapType,
10037         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10038                                      MappableExprsHandler::OMP_MAP_FROM)));
10039     MapperCGF.Builder.CreateBr(EndBB);
10040     MapperCGF.EmitBlock(AllocElseBB);
10041     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10042         LeftToFrom,
10043         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10044     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10045     // In case of to, clear OMP_MAP_FROM.
10046     MapperCGF.EmitBlock(ToBB);
10047     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10048         MemberMapType,
10049         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10050     MapperCGF.Builder.CreateBr(EndBB);
10051     MapperCGF.EmitBlock(ToElseBB);
10052     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10053         LeftToFrom,
10054         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10055     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10056     // In case of from, clear OMP_MAP_TO.
10057     MapperCGF.EmitBlock(FromBB);
10058     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10059         MemberMapType,
10060         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10061     // In case of tofrom, do nothing.
10062     MapperCGF.EmitBlock(EndBB);
10063     LastBB = EndBB;
10064     llvm::PHINode *CurMapType =
10065         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10066     CurMapType->addIncoming(AllocMapType, AllocBB);
10067     CurMapType->addIncoming(ToMapType, ToBB);
10068     CurMapType->addIncoming(FromMapType, FromBB);
10069     CurMapType->addIncoming(MemberMapType, ToElseBB);
10070 
10071     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10072                                      CurSizeArg, CurMapType, CurNameArg};
10073     if (Info.Mappers[I]) {
10074       // Call the corresponding mapper function.
10075       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10076           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10077       assert(MapperFunc && "Expect a valid mapper function is available.");
10078       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10079     } else {
10080       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10081       // data structure.
10082       MapperCGF.EmitRuntimeCall(
10083           OMPBuilder.getOrCreateRuntimeFunction(
10084               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10085           OffloadingArgs);
10086     }
10087   }
10088 
10089   // Update the pointer to point to the next element that needs to be mapped,
10090   // and check whether we have mapped all elements.
10091   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10092       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10093   PtrPHI->addIncoming(PtrNext, LastBB);
10094   llvm::Value *IsDone =
10095       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10096   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10097   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10098 
10099   MapperCGF.EmitBlock(ExitBB);
10100   // Emit array deletion if this is an array section and \p MapType indicates
10101   // that deletion is required.
10102   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10103                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10104 
10105   // Emit the function exit block.
10106   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10107   MapperCGF.FinishFunction();
10108   UDMMap.try_emplace(D, Fn);
10109   if (CGF) {
10110     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10111     Decls.second.push_back(D);
10112   }
10113 }
10114 
10115 /// Emit the array initialization or deletion portion for user-defined mapper
10116 /// code generation. First, it evaluates whether an array section is mapped and
10117 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10118 /// true, and \a MapType indicates to not delete this array, array
10119 /// initialization code is generated. If \a IsInit is false, and \a MapType
10120 /// indicates to not this array, array deletion code is generated.
10121 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10122     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10123     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10124     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10125     bool IsInit) {
10126   StringRef Prefix = IsInit ? ".init" : ".del";
10127 
10128   // Evaluate if this is an array section.
10129   llvm::BasicBlock *BodyBB =
10130       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10131   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10132       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10133   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10134       MapType,
10135       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10136   llvm::Value *DeleteCond;
10137   llvm::Value *Cond;
10138   if (IsInit) {
10139     // base != begin?
10140     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10141     // IsPtrAndObj?
10142     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10143         MapType,
10144         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10145     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10146     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10147     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10148     DeleteCond = MapperCGF.Builder.CreateIsNull(
10149         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10150   } else {
10151     Cond = IsArray;
10152     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10153         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10154   }
10155   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10156   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10157 
10158   MapperCGF.EmitBlock(BodyBB);
10159   // Get the array size by multiplying element size and element number (i.e., \p
10160   // Size).
10161   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10162       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10163   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10164   // memory allocation/deletion purpose only.
10165   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10166       MapType,
10167       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10168                                    MappableExprsHandler::OMP_MAP_FROM)));
10169   MapTypeArg = MapperCGF.Builder.CreateOr(
10170       MapTypeArg,
10171       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10172 
10173   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10174   // data structure.
10175   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10176                                    ArraySize, MapTypeArg, MapName};
10177   MapperCGF.EmitRuntimeCall(
10178       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10179                                             OMPRTL___tgt_push_mapper_component),
10180       OffloadingArgs);
10181 }
10182 
10183 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10184     const OMPDeclareMapperDecl *D) {
10185   auto I = UDMMap.find(D);
10186   if (I != UDMMap.end())
10187     return I->second;
10188   emitUserDefinedMapper(D);
10189   return UDMMap.lookup(D);
10190 }
10191 
10192 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10193     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10194     llvm::Value *DeviceID,
10195     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10196                                      const OMPLoopDirective &D)>
10197         SizeEmitter) {
10198   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10199   const OMPExecutableDirective *TD = &D;
10200   // Get nested teams distribute kind directive, if any.
10201   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10202     TD = getNestedDistributeDirective(CGM.getContext(), D);
10203   if (!TD)
10204     return;
10205   const auto *LD = cast<OMPLoopDirective>(TD);
10206   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10207                                                          PrePostActionTy &) {
10208     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10209       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10210       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10211       CGF.EmitRuntimeCall(
10212           OMPBuilder.getOrCreateRuntimeFunction(
10213               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10214           Args);
10215     }
10216   };
10217   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10218 }
10219 
10220 void CGOpenMPRuntime::emitTargetCall(
10221     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10222     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10223     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10224     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10225                                      const OMPLoopDirective &D)>
10226         SizeEmitter) {
10227   if (!CGF.HaveInsertPoint())
10228     return;
10229 
10230   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
10231                                    CGM.getLangOpts().OpenMPOffloadMandatory;
10232 
10233   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10234 
10235   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10236                                  D.hasClausesOfKind<OMPNowaitClause>();
10237   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10238   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10239   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10240                                             PrePostActionTy &) {
10241     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10242   };
10243   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10244 
10245   CodeGenFunction::OMPTargetDataInfo InputInfo;
10246   llvm::Value *MapTypesArray = nullptr;
10247   llvm::Value *MapNamesArray = nullptr;
10248   // Generate code for the host fallback function.
10249   auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
10250                         &CS, OffloadingMandatory](CodeGenFunction &CGF) {
10251     if (OffloadingMandatory) {
10252       CGF.Builder.CreateUnreachable();
10253     } else {
10254       if (RequiresOuterTask) {
10255         CapturedVars.clear();
10256         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10257       }
10258       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10259     }
10260   };
10261   // Fill up the pointer arrays and transfer execution to the device.
10262   auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
10263                     &MapNamesArray, SizeEmitter,
10264                     FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10265     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10266       // Reverse offloading is not supported, so just execute on the host.
10267       FallbackGen(CGF);
10268       return;
10269     }
10270 
10271     // On top of the arrays that were filled up, the target offloading call
10272     // takes as arguments the device id as well as the host pointer. The host
10273     // pointer is used by the runtime library to identify the current target
10274     // region, so it only has to be unique and not necessarily point to
10275     // anything. It could be the pointer to the outlined function that
10276     // implements the target region, but we aren't using that so that the
10277     // compiler doesn't need to keep that, and could therefore inline the host
10278     // function if proven worthwhile during optimization.
10279 
10280     // From this point on, we need to have an ID of the target region defined.
10281     assert(OutlinedFnID && "Invalid outlined function ID!");
10282     (void)OutlinedFnID;
10283 
10284     // Emit device ID if any.
10285     llvm::Value *DeviceID;
10286     if (Device.getPointer()) {
10287       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10288               Device.getInt() == OMPC_DEVICE_device_num) &&
10289              "Expected device_num modifier.");
10290       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10291       DeviceID =
10292           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10293     } else {
10294       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10295     }
10296 
10297     // Emit the number of elements in the offloading arrays.
10298     llvm::Value *PointerNum =
10299         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10300 
10301     // Return value of the runtime offloading call.
10302     llvm::Value *Return;
10303 
10304     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10305     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10306 
10307     // Source location for the ident struct
10308     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10309 
10310     // Emit tripcount for the target loop-based directive.
10311     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10312 
10313     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10314     // The target region is an outlined function launched by the runtime
10315     // via calls __tgt_target() or __tgt_target_teams().
10316     //
10317     // __tgt_target() launches a target region with one team and one thread,
10318     // executing a serial region.  This master thread may in turn launch
10319     // more threads within its team upon encountering a parallel region,
10320     // however, no additional teams can be launched on the device.
10321     //
10322     // __tgt_target_teams() launches a target region with one or more teams,
10323     // each with one or more threads.  This call is required for target
10324     // constructs such as:
10325     //  'target teams'
10326     //  'target' / 'teams'
10327     //  'target teams distribute parallel for'
10328     //  'target parallel'
10329     // and so on.
10330     //
10331     // Note that on the host and CPU targets, the runtime implementation of
10332     // these calls simply call the outlined function without forking threads.
10333     // The outlined functions themselves have runtime calls to
10334     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10335     // the compiler in emitTeamsCall() and emitParallelCall().
10336     //
10337     // In contrast, on the NVPTX target, the implementation of
10338     // __tgt_target_teams() launches a GPU kernel with the requested number
10339     // of teams and threads so no additional calls to the runtime are required.
10340     if (NumTeams) {
10341       // If we have NumTeams defined this means that we have an enclosed teams
10342       // region. Therefore we also expect to have NumThreads defined. These two
10343       // values should be defined in the presence of a teams directive,
10344       // regardless of having any clauses associated. If the user is using teams
10345       // but no clauses, these two values will be the default that should be
10346       // passed to the runtime library - a 32-bit integer with the value zero.
10347       assert(NumThreads && "Thread limit expression should be available along "
10348                            "with number of teams.");
10349       SmallVector<llvm::Value *> OffloadingArgs = {
10350           RTLoc,
10351           DeviceID,
10352           OutlinedFnID,
10353           PointerNum,
10354           InputInfo.BasePointersArray.getPointer(),
10355           InputInfo.PointersArray.getPointer(),
10356           InputInfo.SizesArray.getPointer(),
10357           MapTypesArray,
10358           MapNamesArray,
10359           InputInfo.MappersArray.getPointer(),
10360           NumTeams,
10361           NumThreads};
10362       if (HasNowait) {
10363         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10364         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10365         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10366         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10367         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10368         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10369       }
10370       Return = CGF.EmitRuntimeCall(
10371           OMPBuilder.getOrCreateRuntimeFunction(
10372               CGM.getModule(), HasNowait
10373                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10374                                    : OMPRTL___tgt_target_teams_mapper),
10375           OffloadingArgs);
10376     } else {
10377       SmallVector<llvm::Value *> OffloadingArgs = {
10378           RTLoc,
10379           DeviceID,
10380           OutlinedFnID,
10381           PointerNum,
10382           InputInfo.BasePointersArray.getPointer(),
10383           InputInfo.PointersArray.getPointer(),
10384           InputInfo.SizesArray.getPointer(),
10385           MapTypesArray,
10386           MapNamesArray,
10387           InputInfo.MappersArray.getPointer()};
10388       if (HasNowait) {
10389         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10390         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10391         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10392         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10393         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10394         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10395       }
10396       Return = CGF.EmitRuntimeCall(
10397           OMPBuilder.getOrCreateRuntimeFunction(
10398               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10399                                          : OMPRTL___tgt_target_mapper),
10400           OffloadingArgs);
10401     }
10402 
10403     // Check the error code and execute the host version if required.
10404     llvm::BasicBlock *OffloadFailedBlock =
10405         CGF.createBasicBlock("omp_offload.failed");
10406     llvm::BasicBlock *OffloadContBlock =
10407         CGF.createBasicBlock("omp_offload.cont");
10408     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10409     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10410 
10411     CGF.EmitBlock(OffloadFailedBlock);
10412     FallbackGen(CGF);
10413 
10414     CGF.EmitBranch(OffloadContBlock);
10415 
10416     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10417   };
10418 
10419   // Notify that the host version must be executed.
10420   auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10421     FallbackGen(CGF);
10422   };
10423 
10424   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10425                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10426                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10427     // Fill up the arrays with all the captured variables.
10428     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10429 
10430     // Get mappable expression information.
10431     MappableExprsHandler MEHandler(D, CGF);
10432     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10433     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10434 
10435     auto RI = CS.getCapturedRecordDecl()->field_begin();
10436     auto *CV = CapturedVars.begin();
10437     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10438                                               CE = CS.capture_end();
10439          CI != CE; ++CI, ++RI, ++CV) {
10440       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10441       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10442 
10443       // VLA sizes are passed to the outlined region by copy and do not have map
10444       // information associated.
10445       if (CI->capturesVariableArrayType()) {
10446         CurInfo.Exprs.push_back(nullptr);
10447         CurInfo.BasePointers.push_back(*CV);
10448         CurInfo.Pointers.push_back(*CV);
10449         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10450             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10451         // Copy to the device as an argument. No need to retrieve it.
10452         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10453                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10454                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10455         CurInfo.Mappers.push_back(nullptr);
10456       } else {
10457         // If we have any information in the map clause, we use it, otherwise we
10458         // just do a default mapping.
10459         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10460         if (!CI->capturesThis())
10461           MappedVarSet.insert(CI->getCapturedVar());
10462         else
10463           MappedVarSet.insert(nullptr);
10464         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10465           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10466         // Generate correct mapping for variables captured by reference in
10467         // lambdas.
10468         if (CI->capturesVariable())
10469           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10470                                                   CurInfo, LambdaPointers);
10471       }
10472       // We expect to have at least an element of information for this capture.
10473       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10474              "Non-existing map pointer for capture!");
10475       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10476              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10477              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10478              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10479              "Inconsistent map information sizes!");
10480 
10481       // If there is an entry in PartialStruct it means we have a struct with
10482       // individual members mapped. Emit an extra combined entry.
10483       if (PartialStruct.Base.isValid()) {
10484         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10485         MEHandler.emitCombinedEntry(
10486             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10487             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10488       }
10489 
10490       // We need to append the results of this capture to what we already have.
10491       CombinedInfo.append(CurInfo);
10492     }
10493     // Adjust MEMBER_OF flags for the lambdas captures.
10494     MEHandler.adjustMemberOfForLambdaCaptures(
10495         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10496         CombinedInfo.Types);
10497     // Map any list items in a map clause that were not captures because they
10498     // weren't referenced within the construct.
10499     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10500 
10501     TargetDataInfo Info;
10502     // Fill up the arrays and create the arguments.
10503     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10504     emitOffloadingArraysArgument(
10505         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10506         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10507         {/*ForEndCall=*/false});
10508 
10509     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10510     InputInfo.BasePointersArray =
10511         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10512     InputInfo.PointersArray =
10513         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10514     InputInfo.SizesArray =
10515         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10516     InputInfo.MappersArray =
10517         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10518     MapTypesArray = Info.MapTypesArray;
10519     MapNamesArray = Info.MapNamesArray;
10520     if (RequiresOuterTask)
10521       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10522     else
10523       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10524   };
10525 
10526   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10527                              CodeGenFunction &CGF, PrePostActionTy &) {
10528     if (RequiresOuterTask) {
10529       CodeGenFunction::OMPTargetDataInfo InputInfo;
10530       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10531     } else {
10532       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10533     }
10534   };
10535 
10536   // If we have a target function ID it means that we need to support
10537   // offloading, otherwise, just execute on the host. We need to execute on host
10538   // regardless of the conditional in the if clause if, e.g., the user do not
10539   // specify target triples.
10540   if (OutlinedFnID) {
10541     if (IfCond) {
10542       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10543     } else {
10544       RegionCodeGenTy ThenRCG(TargetThenGen);
10545       ThenRCG(CGF);
10546     }
10547   } else {
10548     RegionCodeGenTy ElseRCG(TargetElseGen);
10549     ElseRCG(CGF);
10550   }
10551 }
10552 
10553 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10554                                                     StringRef ParentName) {
10555   if (!S)
10556     return;
10557 
10558   // Codegen OMP target directives that offload compute to the device.
10559   bool RequiresDeviceCodegen =
10560       isa<OMPExecutableDirective>(S) &&
10561       isOpenMPTargetExecutionDirective(
10562           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10563 
10564   if (RequiresDeviceCodegen) {
10565     const auto &E = *cast<OMPExecutableDirective>(S);
10566     unsigned DeviceID;
10567     unsigned FileID;
10568     unsigned Line;
10569     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10570                              FileID, Line);
10571 
10572     // Is this a target region that should not be emitted as an entry point? If
10573     // so just signal we are done with this target region.
10574     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10575                                                             ParentName, Line))
10576       return;
10577 
10578     switch (E.getDirectiveKind()) {
10579     case OMPD_target:
10580       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10581                                                    cast<OMPTargetDirective>(E));
10582       break;
10583     case OMPD_target_parallel:
10584       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10585           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10586       break;
10587     case OMPD_target_teams:
10588       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10589           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10590       break;
10591     case OMPD_target_teams_distribute:
10592       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10593           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10594       break;
10595     case OMPD_target_teams_distribute_simd:
10596       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10597           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10598       break;
10599     case OMPD_target_parallel_for:
10600       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10601           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10602       break;
10603     case OMPD_target_parallel_for_simd:
10604       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10605           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10606       break;
10607     case OMPD_target_simd:
10608       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10609           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10610       break;
10611     case OMPD_target_teams_distribute_parallel_for:
10612       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10613           CGM, ParentName,
10614           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10615       break;
10616     case OMPD_target_teams_distribute_parallel_for_simd:
10617       CodeGenFunction::
10618           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10619               CGM, ParentName,
10620               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10621       break;
10622     case OMPD_parallel:
10623     case OMPD_for:
10624     case OMPD_parallel_for:
10625     case OMPD_parallel_master:
10626     case OMPD_parallel_sections:
10627     case OMPD_for_simd:
10628     case OMPD_parallel_for_simd:
10629     case OMPD_cancel:
10630     case OMPD_cancellation_point:
10631     case OMPD_ordered:
10632     case OMPD_threadprivate:
10633     case OMPD_allocate:
10634     case OMPD_task:
10635     case OMPD_simd:
10636     case OMPD_tile:
10637     case OMPD_unroll:
10638     case OMPD_sections:
10639     case OMPD_section:
10640     case OMPD_single:
10641     case OMPD_master:
10642     case OMPD_critical:
10643     case OMPD_taskyield:
10644     case OMPD_barrier:
10645     case OMPD_taskwait:
10646     case OMPD_taskgroup:
10647     case OMPD_atomic:
10648     case OMPD_flush:
10649     case OMPD_depobj:
10650     case OMPD_scan:
10651     case OMPD_teams:
10652     case OMPD_target_data:
10653     case OMPD_target_exit_data:
10654     case OMPD_target_enter_data:
10655     case OMPD_distribute:
10656     case OMPD_distribute_simd:
10657     case OMPD_distribute_parallel_for:
10658     case OMPD_distribute_parallel_for_simd:
10659     case OMPD_teams_distribute:
10660     case OMPD_teams_distribute_simd:
10661     case OMPD_teams_distribute_parallel_for:
10662     case OMPD_teams_distribute_parallel_for_simd:
10663     case OMPD_target_update:
10664     case OMPD_declare_simd:
10665     case OMPD_declare_variant:
10666     case OMPD_begin_declare_variant:
10667     case OMPD_end_declare_variant:
10668     case OMPD_declare_target:
10669     case OMPD_end_declare_target:
10670     case OMPD_declare_reduction:
10671     case OMPD_declare_mapper:
10672     case OMPD_taskloop:
10673     case OMPD_taskloop_simd:
10674     case OMPD_master_taskloop:
10675     case OMPD_master_taskloop_simd:
10676     case OMPD_parallel_master_taskloop:
10677     case OMPD_parallel_master_taskloop_simd:
10678     case OMPD_requires:
10679     case OMPD_metadirective:
10680     case OMPD_unknown:
10681     default:
10682       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10683     }
10684     return;
10685   }
10686 
10687   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10688     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10689       return;
10690 
10691     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10692     return;
10693   }
10694 
10695   // If this is a lambda function, look into its body.
10696   if (const auto *L = dyn_cast<LambdaExpr>(S))
10697     S = L->getBody();
10698 
10699   // Keep looking for target regions recursively.
10700   for (const Stmt *II : S->children())
10701     scanForTargetRegionsFunctions(II, ParentName);
10702 }
10703 
10704 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10705   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10706       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10707   if (!DevTy)
10708     return false;
10709   // Do not emit device_type(nohost) functions for the host.
10710   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10711     return true;
10712   // Do not emit device_type(host) functions for the device.
10713   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10714     return true;
10715   return false;
10716 }
10717 
10718 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10719   // If emitting code for the host, we do not process FD here. Instead we do
10720   // the normal code generation.
10721   if (!CGM.getLangOpts().OpenMPIsDevice) {
10722     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10723       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10724                                   CGM.getLangOpts().OpenMPIsDevice))
10725         return true;
10726     return false;
10727   }
10728 
10729   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10730   // Try to detect target regions in the function.
10731   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10732     StringRef Name = CGM.getMangledName(GD);
10733     scanForTargetRegionsFunctions(FD->getBody(), Name);
10734     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10735                                 CGM.getLangOpts().OpenMPIsDevice))
10736       return true;
10737   }
10738 
10739   // Do not to emit function if it is not marked as declare target.
10740   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10741          AlreadyEmittedTargetDecls.count(VD) == 0;
10742 }
10743 
10744 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10745   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10746                               CGM.getLangOpts().OpenMPIsDevice))
10747     return true;
10748 
10749   if (!CGM.getLangOpts().OpenMPIsDevice)
10750     return false;
10751 
10752   // Check if there are Ctors/Dtors in this declaration and look for target
10753   // regions in it. We use the complete variant to produce the kernel name
10754   // mangling.
10755   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10756   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10757     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10758       StringRef ParentName =
10759           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10760       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10761     }
10762     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10763       StringRef ParentName =
10764           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10765       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10766     }
10767   }
10768 
10769   // Do not to emit variable if it is not marked as declare target.
10770   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10771       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10772           cast<VarDecl>(GD.getDecl()));
10773   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10774       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10775        HasRequiresUnifiedSharedMemory)) {
10776     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10777     return true;
10778   }
10779   return false;
10780 }
10781 
10782 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10783                                                    llvm::Constant *Addr) {
10784   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10785       !CGM.getLangOpts().OpenMPIsDevice)
10786     return;
10787 
10788   // If we have host/nohost variables, they do not need to be registered.
10789   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10790       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10791   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10792     return;
10793 
10794   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10795       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10796   if (!Res) {
10797     if (CGM.getLangOpts().OpenMPIsDevice) {
10798       // Register non-target variables being emitted in device code (debug info
10799       // may cause this).
10800       StringRef VarName = CGM.getMangledName(VD);
10801       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10802     }
10803     return;
10804   }
10805   // Register declare target variables.
10806   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10807   StringRef VarName;
10808   CharUnits VarSize;
10809   llvm::GlobalValue::LinkageTypes Linkage;
10810 
10811   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10812       !HasRequiresUnifiedSharedMemory) {
10813     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10814     VarName = CGM.getMangledName(VD);
10815     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10816       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10817       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10818     } else {
10819       VarSize = CharUnits::Zero();
10820     }
10821     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10822     // Temp solution to prevent optimizations of the internal variables.
10823     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10824       // Do not create a "ref-variable" if the original is not also available
10825       // on the host.
10826       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10827         return;
10828       std::string RefName = getName({VarName, "ref"});
10829       if (!CGM.GetGlobalValue(RefName)) {
10830         llvm::Constant *AddrRef =
10831             getOrCreateInternalVariable(Addr->getType(), RefName);
10832         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10833         GVAddrRef->setConstant(/*Val=*/true);
10834         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10835         GVAddrRef->setInitializer(Addr);
10836         CGM.addCompilerUsedGlobal(GVAddrRef);
10837       }
10838     }
10839   } else {
10840     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10841             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10842              HasRequiresUnifiedSharedMemory)) &&
10843            "Declare target attribute must link or to with unified memory.");
10844     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10845       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10846     else
10847       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10848 
10849     if (CGM.getLangOpts().OpenMPIsDevice) {
10850       VarName = Addr->getName();
10851       Addr = nullptr;
10852     } else {
10853       VarName = getAddrOfDeclareTargetVar(VD).getName();
10854       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10855     }
10856     VarSize = CGM.getPointerSize();
10857     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10858   }
10859 
10860   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10861       VarName, Addr, VarSize, Flags, Linkage);
10862 }
10863 
10864 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10865   if (isa<FunctionDecl>(GD.getDecl()) ||
10866       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10867     return emitTargetFunctions(GD);
10868 
10869   return emitTargetGlobalVariable(GD);
10870 }
10871 
10872 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10873   for (const VarDecl *VD : DeferredGlobalVariables) {
10874     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10875         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10876     if (!Res)
10877       continue;
10878     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10879         !HasRequiresUnifiedSharedMemory) {
10880       CGM.EmitGlobal(VD);
10881     } else {
10882       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10883               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10884                HasRequiresUnifiedSharedMemory)) &&
10885              "Expected link clause or to clause with unified memory.");
10886       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10887     }
10888   }
10889 }
10890 
10891 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10892     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10893   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10894          " Expected target-based directive.");
10895 }
10896 
10897 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10898   for (const OMPClause *Clause : D->clauselists()) {
10899     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10900       HasRequiresUnifiedSharedMemory = true;
10901     } else if (const auto *AC =
10902                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10903       switch (AC->getAtomicDefaultMemOrderKind()) {
10904       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10905         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10906         break;
10907       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10908         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10909         break;
10910       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10911         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10912         break;
10913       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10914         break;
10915       }
10916     }
10917   }
10918 }
10919 
10920 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10921   return RequiresAtomicOrdering;
10922 }
10923 
10924 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10925                                                        LangAS &AS) {
10926   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10927     return false;
10928   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10929   switch(A->getAllocatorType()) {
10930   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10931   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10932   // Not supported, fallback to the default mem space.
10933   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10934   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10935   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10936   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10937   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10938   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10939   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10940     AS = LangAS::Default;
10941     return true;
10942   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10943     llvm_unreachable("Expected predefined allocator for the variables with the "
10944                      "static storage.");
10945   }
10946   return false;
10947 }
10948 
10949 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10950   return HasRequiresUnifiedSharedMemory;
10951 }
10952 
10953 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10954     CodeGenModule &CGM)
10955     : CGM(CGM) {
10956   if (CGM.getLangOpts().OpenMPIsDevice) {
10957     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10958     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10959   }
10960 }
10961 
10962 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10963   if (CGM.getLangOpts().OpenMPIsDevice)
10964     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10965 }
10966 
10967 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10968   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10969     return true;
10970 
10971   const auto *D = cast<FunctionDecl>(GD.getDecl());
10972   // Do not to emit function if it is marked as declare target as it was already
10973   // emitted.
10974   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10975     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10976       if (auto *F = dyn_cast_or_null<llvm::Function>(
10977               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10978         return !F->isDeclaration();
10979       return false;
10980     }
10981     return true;
10982   }
10983 
10984   return !AlreadyEmittedTargetDecls.insert(D).second;
10985 }
10986 
10987 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10988   // If we don't have entries or if we are emitting code for the device, we
10989   // don't need to do anything.
10990   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10991       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10992       (OffloadEntriesInfoManager.empty() &&
10993        !HasEmittedDeclareTargetRegion &&
10994        !HasEmittedTargetRegion))
10995     return nullptr;
10996 
10997   // Create and register the function that handles the requires directives.
10998   ASTContext &C = CGM.getContext();
10999 
11000   llvm::Function *RequiresRegFn;
11001   {
11002     CodeGenFunction CGF(CGM);
11003     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11004     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11005     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11006     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11007     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11008     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11009     // TODO: check for other requires clauses.
11010     // The requires directive takes effect only when a target region is
11011     // present in the compilation unit. Otherwise it is ignored and not
11012     // passed to the runtime. This avoids the runtime from throwing an error
11013     // for mismatching requires clauses across compilation units that don't
11014     // contain at least 1 target region.
11015     assert((HasEmittedTargetRegion ||
11016             HasEmittedDeclareTargetRegion ||
11017             !OffloadEntriesInfoManager.empty()) &&
11018            "Target or declare target region expected.");
11019     if (HasRequiresUnifiedSharedMemory)
11020       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11021     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11022                             CGM.getModule(), OMPRTL___tgt_register_requires),
11023                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11024     CGF.FinishFunction();
11025   }
11026   return RequiresRegFn;
11027 }
11028 
11029 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11030                                     const OMPExecutableDirective &D,
11031                                     SourceLocation Loc,
11032                                     llvm::Function *OutlinedFn,
11033                                     ArrayRef<llvm::Value *> CapturedVars) {
11034   if (!CGF.HaveInsertPoint())
11035     return;
11036 
11037   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11038   CodeGenFunction::RunCleanupsScope Scope(CGF);
11039 
11040   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11041   llvm::Value *Args[] = {
11042       RTLoc,
11043       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11044       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11045   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11046   RealArgs.append(std::begin(Args), std::end(Args));
11047   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11048 
11049   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11050       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11051   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11052 }
11053 
11054 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11055                                          const Expr *NumTeams,
11056                                          const Expr *ThreadLimit,
11057                                          SourceLocation Loc) {
11058   if (!CGF.HaveInsertPoint())
11059     return;
11060 
11061   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11062 
11063   llvm::Value *NumTeamsVal =
11064       NumTeams
11065           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11066                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11067           : CGF.Builder.getInt32(0);
11068 
11069   llvm::Value *ThreadLimitVal =
11070       ThreadLimit
11071           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11072                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11073           : CGF.Builder.getInt32(0);
11074 
11075   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11076   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11077                                      ThreadLimitVal};
11078   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11079                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11080                       PushNumTeamsArgs);
11081 }
11082 
11083 void CGOpenMPRuntime::emitTargetDataCalls(
11084     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11085     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11086   if (!CGF.HaveInsertPoint())
11087     return;
11088 
11089   // Action used to replace the default codegen action and turn privatization
11090   // off.
11091   PrePostActionTy NoPrivAction;
11092 
11093   // Generate the code for the opening of the data environment. Capture all the
11094   // arguments of the runtime call by reference because they are used in the
11095   // closing of the region.
11096   auto &&BeginThenGen = [this, &D, Device, &Info,
11097                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11098     // Fill up the arrays with all the mapped variables.
11099     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11100 
11101     // Get map clause information.
11102     MappableExprsHandler MEHandler(D, CGF);
11103     MEHandler.generateAllInfo(CombinedInfo);
11104 
11105     // Fill up the arrays and create the arguments.
11106     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11107                          /*IsNonContiguous=*/true);
11108 
11109     llvm::Value *BasePointersArrayArg = nullptr;
11110     llvm::Value *PointersArrayArg = nullptr;
11111     llvm::Value *SizesArrayArg = nullptr;
11112     llvm::Value *MapTypesArrayArg = nullptr;
11113     llvm::Value *MapNamesArrayArg = nullptr;
11114     llvm::Value *MappersArrayArg = nullptr;
11115     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11116                                  SizesArrayArg, MapTypesArrayArg,
11117                                  MapNamesArrayArg, MappersArrayArg, Info);
11118 
11119     // Emit device ID if any.
11120     llvm::Value *DeviceID = nullptr;
11121     if (Device) {
11122       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11123                                            CGF.Int64Ty, /*isSigned=*/true);
11124     } else {
11125       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11126     }
11127 
11128     // Emit the number of elements in the offloading arrays.
11129     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11130     //
11131     // Source location for the ident struct
11132     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11133 
11134     llvm::Value *OffloadingArgs[] = {RTLoc,
11135                                      DeviceID,
11136                                      PointerNum,
11137                                      BasePointersArrayArg,
11138                                      PointersArrayArg,
11139                                      SizesArrayArg,
11140                                      MapTypesArrayArg,
11141                                      MapNamesArrayArg,
11142                                      MappersArrayArg};
11143     CGF.EmitRuntimeCall(
11144         OMPBuilder.getOrCreateRuntimeFunction(
11145             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11146         OffloadingArgs);
11147 
11148     // If device pointer privatization is required, emit the body of the region
11149     // here. It will have to be duplicated: with and without privatization.
11150     if (!Info.CaptureDeviceAddrMap.empty())
11151       CodeGen(CGF);
11152   };
11153 
11154   // Generate code for the closing of the data region.
11155   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11156                                                 PrePostActionTy &) {
11157     assert(Info.isValid() && "Invalid data environment closing arguments.");
11158 
11159     llvm::Value *BasePointersArrayArg = nullptr;
11160     llvm::Value *PointersArrayArg = nullptr;
11161     llvm::Value *SizesArrayArg = nullptr;
11162     llvm::Value *MapTypesArrayArg = nullptr;
11163     llvm::Value *MapNamesArrayArg = nullptr;
11164     llvm::Value *MappersArrayArg = nullptr;
11165     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11166                                  SizesArrayArg, MapTypesArrayArg,
11167                                  MapNamesArrayArg, MappersArrayArg, Info,
11168                                  {/*ForEndCall=*/true});
11169 
11170     // Emit device ID if any.
11171     llvm::Value *DeviceID = nullptr;
11172     if (Device) {
11173       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11174                                            CGF.Int64Ty, /*isSigned=*/true);
11175     } else {
11176       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11177     }
11178 
11179     // Emit the number of elements in the offloading arrays.
11180     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11181 
11182     // Source location for the ident struct
11183     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11184 
11185     llvm::Value *OffloadingArgs[] = {RTLoc,
11186                                      DeviceID,
11187                                      PointerNum,
11188                                      BasePointersArrayArg,
11189                                      PointersArrayArg,
11190                                      SizesArrayArg,
11191                                      MapTypesArrayArg,
11192                                      MapNamesArrayArg,
11193                                      MappersArrayArg};
11194     CGF.EmitRuntimeCall(
11195         OMPBuilder.getOrCreateRuntimeFunction(
11196             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11197         OffloadingArgs);
11198   };
11199 
11200   // If we need device pointer privatization, we need to emit the body of the
11201   // region with no privatization in the 'else' branch of the conditional.
11202   // Otherwise, we don't have to do anything.
11203   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11204                                                          PrePostActionTy &) {
11205     if (!Info.CaptureDeviceAddrMap.empty()) {
11206       CodeGen.setAction(NoPrivAction);
11207       CodeGen(CGF);
11208     }
11209   };
11210 
11211   // We don't have to do anything to close the region if the if clause evaluates
11212   // to false.
11213   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11214 
11215   if (IfCond) {
11216     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11217   } else {
11218     RegionCodeGenTy RCG(BeginThenGen);
11219     RCG(CGF);
11220   }
11221 
11222   // If we don't require privatization of device pointers, we emit the body in
11223   // between the runtime calls. This avoids duplicating the body code.
11224   if (Info.CaptureDeviceAddrMap.empty()) {
11225     CodeGen.setAction(NoPrivAction);
11226     CodeGen(CGF);
11227   }
11228 
11229   if (IfCond) {
11230     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11231   } else {
11232     RegionCodeGenTy RCG(EndThenGen);
11233     RCG(CGF);
11234   }
11235 }
11236 
11237 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11238     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11239     const Expr *Device) {
11240   if (!CGF.HaveInsertPoint())
11241     return;
11242 
11243   assert((isa<OMPTargetEnterDataDirective>(D) ||
11244           isa<OMPTargetExitDataDirective>(D) ||
11245           isa<OMPTargetUpdateDirective>(D)) &&
11246          "Expecting either target enter, exit data, or update directives.");
11247 
11248   CodeGenFunction::OMPTargetDataInfo InputInfo;
11249   llvm::Value *MapTypesArray = nullptr;
11250   llvm::Value *MapNamesArray = nullptr;
11251   // Generate the code for the opening of the data environment.
11252   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11253                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11254     // Emit device ID if any.
11255     llvm::Value *DeviceID = nullptr;
11256     if (Device) {
11257       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11258                                            CGF.Int64Ty, /*isSigned=*/true);
11259     } else {
11260       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11261     }
11262 
11263     // Emit the number of elements in the offloading arrays.
11264     llvm::Constant *PointerNum =
11265         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11266 
11267     // Source location for the ident struct
11268     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11269 
11270     llvm::Value *OffloadingArgs[] = {RTLoc,
11271                                      DeviceID,
11272                                      PointerNum,
11273                                      InputInfo.BasePointersArray.getPointer(),
11274                                      InputInfo.PointersArray.getPointer(),
11275                                      InputInfo.SizesArray.getPointer(),
11276                                      MapTypesArray,
11277                                      MapNamesArray,
11278                                      InputInfo.MappersArray.getPointer()};
11279 
11280     // Select the right runtime function call for each standalone
11281     // directive.
11282     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11283     RuntimeFunction RTLFn;
11284     switch (D.getDirectiveKind()) {
11285     case OMPD_target_enter_data:
11286       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11287                         : OMPRTL___tgt_target_data_begin_mapper;
11288       break;
11289     case OMPD_target_exit_data:
11290       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11291                         : OMPRTL___tgt_target_data_end_mapper;
11292       break;
11293     case OMPD_target_update:
11294       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11295                         : OMPRTL___tgt_target_data_update_mapper;
11296       break;
11297     case OMPD_parallel:
11298     case OMPD_for:
11299     case OMPD_parallel_for:
11300     case OMPD_parallel_master:
11301     case OMPD_parallel_sections:
11302     case OMPD_for_simd:
11303     case OMPD_parallel_for_simd:
11304     case OMPD_cancel:
11305     case OMPD_cancellation_point:
11306     case OMPD_ordered:
11307     case OMPD_threadprivate:
11308     case OMPD_allocate:
11309     case OMPD_task:
11310     case OMPD_simd:
11311     case OMPD_tile:
11312     case OMPD_unroll:
11313     case OMPD_sections:
11314     case OMPD_section:
11315     case OMPD_single:
11316     case OMPD_master:
11317     case OMPD_critical:
11318     case OMPD_taskyield:
11319     case OMPD_barrier:
11320     case OMPD_taskwait:
11321     case OMPD_taskgroup:
11322     case OMPD_atomic:
11323     case OMPD_flush:
11324     case OMPD_depobj:
11325     case OMPD_scan:
11326     case OMPD_teams:
11327     case OMPD_target_data:
11328     case OMPD_distribute:
11329     case OMPD_distribute_simd:
11330     case OMPD_distribute_parallel_for:
11331     case OMPD_distribute_parallel_for_simd:
11332     case OMPD_teams_distribute:
11333     case OMPD_teams_distribute_simd:
11334     case OMPD_teams_distribute_parallel_for:
11335     case OMPD_teams_distribute_parallel_for_simd:
11336     case OMPD_declare_simd:
11337     case OMPD_declare_variant:
11338     case OMPD_begin_declare_variant:
11339     case OMPD_end_declare_variant:
11340     case OMPD_declare_target:
11341     case OMPD_end_declare_target:
11342     case OMPD_declare_reduction:
11343     case OMPD_declare_mapper:
11344     case OMPD_taskloop:
11345     case OMPD_taskloop_simd:
11346     case OMPD_master_taskloop:
11347     case OMPD_master_taskloop_simd:
11348     case OMPD_parallel_master_taskloop:
11349     case OMPD_parallel_master_taskloop_simd:
11350     case OMPD_target:
11351     case OMPD_target_simd:
11352     case OMPD_target_teams_distribute:
11353     case OMPD_target_teams_distribute_simd:
11354     case OMPD_target_teams_distribute_parallel_for:
11355     case OMPD_target_teams_distribute_parallel_for_simd:
11356     case OMPD_target_teams:
11357     case OMPD_target_parallel:
11358     case OMPD_target_parallel_for:
11359     case OMPD_target_parallel_for_simd:
11360     case OMPD_requires:
11361     case OMPD_metadirective:
11362     case OMPD_unknown:
11363     default:
11364       llvm_unreachable("Unexpected standalone target data directive.");
11365       break;
11366     }
11367     CGF.EmitRuntimeCall(
11368         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11369         OffloadingArgs);
11370   };
11371 
11372   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11373                           &MapNamesArray](CodeGenFunction &CGF,
11374                                           PrePostActionTy &) {
11375     // Fill up the arrays with all the mapped variables.
11376     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11377 
11378     // Get map clause information.
11379     MappableExprsHandler MEHandler(D, CGF);
11380     MEHandler.generateAllInfo(CombinedInfo);
11381 
11382     TargetDataInfo Info;
11383     // Fill up the arrays and create the arguments.
11384     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11385                          /*IsNonContiguous=*/true);
11386     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11387                              D.hasClausesOfKind<OMPNowaitClause>();
11388     emitOffloadingArraysArgument(
11389         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11390         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11391         {/*ForEndCall=*/false});
11392     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11393     InputInfo.BasePointersArray =
11394         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11395     InputInfo.PointersArray =
11396         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11397     InputInfo.SizesArray =
11398         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11399     InputInfo.MappersArray =
11400         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11401     MapTypesArray = Info.MapTypesArray;
11402     MapNamesArray = Info.MapNamesArray;
11403     if (RequiresOuterTask)
11404       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11405     else
11406       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11407   };
11408 
11409   if (IfCond) {
11410     emitIfClause(CGF, IfCond, TargetThenGen,
11411                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11412   } else {
11413     RegionCodeGenTy ThenRCG(TargetThenGen);
11414     ThenRCG(CGF);
11415   }
11416 }
11417 
11418 namespace {
11419   /// Kind of parameter in a function with 'declare simd' directive.
11420 enum ParamKindTy {
11421   Linear,
11422   LinearRef,
11423   LinearUVal,
11424   LinearVal,
11425   Uniform,
11426   Vector,
11427 };
11428 /// Attribute set of the parameter.
11429 struct ParamAttrTy {
11430   ParamKindTy Kind = Vector;
11431   llvm::APSInt StrideOrArg;
11432   llvm::APSInt Alignment;
11433   bool HasVarStride = false;
11434 };
11435 } // namespace
11436 
11437 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11438                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11439   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11440   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11441   // of that clause. The VLEN value must be power of 2.
11442   // In other case the notion of the function`s "characteristic data type" (CDT)
11443   // is used to compute the vector length.
11444   // CDT is defined in the following order:
11445   //   a) For non-void function, the CDT is the return type.
11446   //   b) If the function has any non-uniform, non-linear parameters, then the
11447   //   CDT is the type of the first such parameter.
11448   //   c) If the CDT determined by a) or b) above is struct, union, or class
11449   //   type which is pass-by-value (except for the type that maps to the
11450   //   built-in complex data type), the characteristic data type is int.
11451   //   d) If none of the above three cases is applicable, the CDT is int.
11452   // The VLEN is then determined based on the CDT and the size of vector
11453   // register of that ISA for which current vector version is generated. The
11454   // VLEN is computed using the formula below:
11455   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11456   // where vector register size specified in section 3.2.1 Registers and the
11457   // Stack Frame of original AMD64 ABI document.
11458   QualType RetType = FD->getReturnType();
11459   if (RetType.isNull())
11460     return 0;
11461   ASTContext &C = FD->getASTContext();
11462   QualType CDT;
11463   if (!RetType.isNull() && !RetType->isVoidType()) {
11464     CDT = RetType;
11465   } else {
11466     unsigned Offset = 0;
11467     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11468       if (ParamAttrs[Offset].Kind == Vector)
11469         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11470       ++Offset;
11471     }
11472     if (CDT.isNull()) {
11473       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11474         if (ParamAttrs[I + Offset].Kind == Vector) {
11475           CDT = FD->getParamDecl(I)->getType();
11476           break;
11477         }
11478       }
11479     }
11480   }
11481   if (CDT.isNull())
11482     CDT = C.IntTy;
11483   CDT = CDT->getCanonicalTypeUnqualified();
11484   if (CDT->isRecordType() || CDT->isUnionType())
11485     CDT = C.IntTy;
11486   return C.getTypeSize(CDT);
11487 }
11488 
11489 /// Mangle the parameter part of the vector function name according to
11490 /// their OpenMP classification. The mangling function is defined in
11491 /// section 4.5 of the AAVFABI(2021Q1).
11492 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11493   SmallString<256> Buffer;
11494   llvm::raw_svector_ostream Out(Buffer);
11495   for (const auto &ParamAttr : ParamAttrs) {
11496     switch (ParamAttr.Kind) {
11497     case Linear:
11498       Out << 'l';
11499       break;
11500     case LinearRef:
11501       Out << 'R';
11502       break;
11503     case LinearUVal:
11504       Out << 'U';
11505       break;
11506     case LinearVal:
11507       Out << 'L';
11508       break;
11509     case Uniform:
11510       Out << 'u';
11511       break;
11512     case Vector:
11513       Out << 'v';
11514       break;
11515     }
11516     if (ParamAttr.HasVarStride)
11517       Out << "s" << ParamAttr.StrideOrArg;
11518     else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11519              ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11520       // Don't print the step value if it is not present or if it is
11521       // equal to 1.
11522       if (ParamAttr.StrideOrArg < 0)
11523         Out << 'n' << -ParamAttr.StrideOrArg;
11524       else if (ParamAttr.StrideOrArg != 1)
11525         Out << ParamAttr.StrideOrArg;
11526     }
11527 
11528     if (!!ParamAttr.Alignment)
11529       Out << 'a' << ParamAttr.Alignment;
11530   }
11531 
11532   return std::string(Out.str());
11533 }
11534 
11535 static void
11536 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11537                            const llvm::APSInt &VLENVal,
11538                            ArrayRef<ParamAttrTy> ParamAttrs,
11539                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11540   struct ISADataTy {
11541     char ISA;
11542     unsigned VecRegSize;
11543   };
11544   ISADataTy ISAData[] = {
11545       {
11546           'b', 128
11547       }, // SSE
11548       {
11549           'c', 256
11550       }, // AVX
11551       {
11552           'd', 256
11553       }, // AVX2
11554       {
11555           'e', 512
11556       }, // AVX512
11557   };
11558   llvm::SmallVector<char, 2> Masked;
11559   switch (State) {
11560   case OMPDeclareSimdDeclAttr::BS_Undefined:
11561     Masked.push_back('N');
11562     Masked.push_back('M');
11563     break;
11564   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11565     Masked.push_back('N');
11566     break;
11567   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11568     Masked.push_back('M');
11569     break;
11570   }
11571   for (char Mask : Masked) {
11572     for (const ISADataTy &Data : ISAData) {
11573       SmallString<256> Buffer;
11574       llvm::raw_svector_ostream Out(Buffer);
11575       Out << "_ZGV" << Data.ISA << Mask;
11576       if (!VLENVal) {
11577         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11578         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11579         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11580       } else {
11581         Out << VLENVal;
11582       }
11583       Out << mangleVectorParameters(ParamAttrs);
11584       Out << '_' << Fn->getName();
11585       Fn->addFnAttr(Out.str());
11586     }
11587   }
11588 }
11589 
11590 // This are the Functions that are needed to mangle the name of the
11591 // vector functions generated by the compiler, according to the rules
11592 // defined in the "Vector Function ABI specifications for AArch64",
11593 // available at
11594 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11595 
11596 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11597 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11598   QT = QT.getCanonicalType();
11599 
11600   if (QT->isVoidType())
11601     return false;
11602 
11603   if (Kind == ParamKindTy::Uniform)
11604     return false;
11605 
11606   if (Kind == ParamKindTy::LinearUVal || ParamKindTy::LinearRef)
11607     return false;
11608 
11609   if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11610       !QT->isReferenceType())
11611     return false;
11612 
11613   return true;
11614 }
11615 
11616 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11617 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11618   QT = QT.getCanonicalType();
11619   unsigned Size = C.getTypeSize(QT);
11620 
11621   // Only scalars and complex within 16 bytes wide set PVB to true.
11622   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11623     return false;
11624 
11625   if (QT->isFloatingType())
11626     return true;
11627 
11628   if (QT->isIntegerType())
11629     return true;
11630 
11631   if (QT->isPointerType())
11632     return true;
11633 
11634   // TODO: Add support for complex types (section 3.1.2, item 2).
11635 
11636   return false;
11637 }
11638 
11639 /// Computes the lane size (LS) of a return type or of an input parameter,
11640 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11641 /// TODO: Add support for references, section 3.2.1, item 1.
11642 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11643   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11644     QualType PTy = QT.getCanonicalType()->getPointeeType();
11645     if (getAArch64PBV(PTy, C))
11646       return C.getTypeSize(PTy);
11647   }
11648   if (getAArch64PBV(QT, C))
11649     return C.getTypeSize(QT);
11650 
11651   return C.getTypeSize(C.getUIntPtrType());
11652 }
11653 
11654 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11655 // signature of the scalar function, as defined in 3.2.2 of the
11656 // AAVFABI.
11657 static std::tuple<unsigned, unsigned, bool>
11658 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11659   QualType RetType = FD->getReturnType().getCanonicalType();
11660 
11661   ASTContext &C = FD->getASTContext();
11662 
11663   bool OutputBecomesInput = false;
11664 
11665   llvm::SmallVector<unsigned, 8> Sizes;
11666   if (!RetType->isVoidType()) {
11667     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11668     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11669       OutputBecomesInput = true;
11670   }
11671   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11672     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11673     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11674   }
11675 
11676   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11677   // The LS of a function parameter / return value can only be a power
11678   // of 2, starting from 8 bits, up to 128.
11679   assert(llvm::all_of(Sizes,
11680                       [](unsigned Size) {
11681                         return Size == 8 || Size == 16 || Size == 32 ||
11682                                Size == 64 || Size == 128;
11683                       }) &&
11684          "Invalid size");
11685 
11686   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11687                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11688                          OutputBecomesInput);
11689 }
11690 
11691 // Function used to add the attribute. The parameter `VLEN` is
11692 // templated to allow the use of "x" when targeting scalable functions
11693 // for SVE.
11694 template <typename T>
11695 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11696                                  char ISA, StringRef ParSeq,
11697                                  StringRef MangledName, bool OutputBecomesInput,
11698                                  llvm::Function *Fn) {
11699   SmallString<256> Buffer;
11700   llvm::raw_svector_ostream Out(Buffer);
11701   Out << Prefix << ISA << LMask << VLEN;
11702   if (OutputBecomesInput)
11703     Out << "v";
11704   Out << ParSeq << "_" << MangledName;
11705   Fn->addFnAttr(Out.str());
11706 }
11707 
11708 // Helper function to generate the Advanced SIMD names depending on
11709 // the value of the NDS when simdlen is not present.
11710 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11711                                       StringRef Prefix, char ISA,
11712                                       StringRef ParSeq, StringRef MangledName,
11713                                       bool OutputBecomesInput,
11714                                       llvm::Function *Fn) {
11715   switch (NDS) {
11716   case 8:
11717     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11718                          OutputBecomesInput, Fn);
11719     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11720                          OutputBecomesInput, Fn);
11721     break;
11722   case 16:
11723     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11724                          OutputBecomesInput, Fn);
11725     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11726                          OutputBecomesInput, Fn);
11727     break;
11728   case 32:
11729     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11730                          OutputBecomesInput, Fn);
11731     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11732                          OutputBecomesInput, Fn);
11733     break;
11734   case 64:
11735   case 128:
11736     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11737                          OutputBecomesInput, Fn);
11738     break;
11739   default:
11740     llvm_unreachable("Scalar type is too wide.");
11741   }
11742 }
11743 
11744 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11745 static void emitAArch64DeclareSimdFunction(
11746     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11747     ArrayRef<ParamAttrTy> ParamAttrs,
11748     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11749     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11750 
11751   // Get basic data for building the vector signature.
11752   const auto Data = getNDSWDS(FD, ParamAttrs);
11753   const unsigned NDS = std::get<0>(Data);
11754   const unsigned WDS = std::get<1>(Data);
11755   const bool OutputBecomesInput = std::get<2>(Data);
11756 
11757   // Check the values provided via `simdlen` by the user.
11758   // 1. A `simdlen(1)` doesn't produce vector signatures,
11759   if (UserVLEN == 1) {
11760     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11761         DiagnosticsEngine::Warning,
11762         "The clause simdlen(1) has no effect when targeting aarch64.");
11763     CGM.getDiags().Report(SLoc, DiagID);
11764     return;
11765   }
11766 
11767   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11768   // Advanced SIMD output.
11769   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11770     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11771         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11772                                     "power of 2 when targeting Advanced SIMD.");
11773     CGM.getDiags().Report(SLoc, DiagID);
11774     return;
11775   }
11776 
11777   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11778   // limits.
11779   if (ISA == 's' && UserVLEN != 0) {
11780     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11781       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11782           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11783                                       "lanes in the architectural constraints "
11784                                       "for SVE (min is 128-bit, max is "
11785                                       "2048-bit, by steps of 128-bit)");
11786       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11787       return;
11788     }
11789   }
11790 
11791   // Sort out parameter sequence.
11792   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11793   StringRef Prefix = "_ZGV";
11794   // Generate simdlen from user input (if any).
11795   if (UserVLEN) {
11796     if (ISA == 's') {
11797       // SVE generates only a masked function.
11798       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11799                            OutputBecomesInput, Fn);
11800     } else {
11801       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11802       // Advanced SIMD generates one or two functions, depending on
11803       // the `[not]inbranch` clause.
11804       switch (State) {
11805       case OMPDeclareSimdDeclAttr::BS_Undefined:
11806         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11807                              OutputBecomesInput, Fn);
11808         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11809                              OutputBecomesInput, Fn);
11810         break;
11811       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11812         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11813                              OutputBecomesInput, Fn);
11814         break;
11815       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11816         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11817                              OutputBecomesInput, Fn);
11818         break;
11819       }
11820     }
11821   } else {
11822     // If no user simdlen is provided, follow the AAVFABI rules for
11823     // generating the vector length.
11824     if (ISA == 's') {
11825       // SVE, section 3.4.1, item 1.
11826       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11827                            OutputBecomesInput, Fn);
11828     } else {
11829       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11830       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11831       // two vector names depending on the use of the clause
11832       // `[not]inbranch`.
11833       switch (State) {
11834       case OMPDeclareSimdDeclAttr::BS_Undefined:
11835         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11836                                   OutputBecomesInput, Fn);
11837         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11838                                   OutputBecomesInput, Fn);
11839         break;
11840       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11841         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11842                                   OutputBecomesInput, Fn);
11843         break;
11844       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11845         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11846                                   OutputBecomesInput, Fn);
11847         break;
11848       }
11849     }
11850   }
11851 }
11852 
11853 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11854                                               llvm::Function *Fn) {
11855   ASTContext &C = CGM.getContext();
11856   FD = FD->getMostRecentDecl();
11857   while (FD) {
11858     // Map params to their positions in function decl.
11859     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11860     if (isa<CXXMethodDecl>(FD))
11861       ParamPositions.try_emplace(FD, 0);
11862     unsigned ParamPos = ParamPositions.size();
11863     for (const ParmVarDecl *P : FD->parameters()) {
11864       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11865       ++ParamPos;
11866     }
11867     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11868       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11869       // Mark uniform parameters.
11870       for (const Expr *E : Attr->uniforms()) {
11871         E = E->IgnoreParenImpCasts();
11872         unsigned Pos;
11873         if (isa<CXXThisExpr>(E)) {
11874           Pos = ParamPositions[FD];
11875         } else {
11876           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11877                                 ->getCanonicalDecl();
11878           auto It = ParamPositions.find(PVD);
11879           assert(It != ParamPositions.end() && "Function parameter not found");
11880           Pos = It->second;
11881         }
11882         ParamAttrs[Pos].Kind = Uniform;
11883       }
11884       // Get alignment info.
11885       auto *NI = Attr->alignments_begin();
11886       for (const Expr *E : Attr->aligneds()) {
11887         E = E->IgnoreParenImpCasts();
11888         unsigned Pos;
11889         QualType ParmTy;
11890         if (isa<CXXThisExpr>(E)) {
11891           Pos = ParamPositions[FD];
11892           ParmTy = E->getType();
11893         } else {
11894           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11895                                 ->getCanonicalDecl();
11896           auto It = ParamPositions.find(PVD);
11897           assert(It != ParamPositions.end() && "Function parameter not found");
11898           Pos = It->second;
11899           ParmTy = PVD->getType();
11900         }
11901         ParamAttrs[Pos].Alignment =
11902             (*NI)
11903                 ? (*NI)->EvaluateKnownConstInt(C)
11904                 : llvm::APSInt::getUnsigned(
11905                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11906                           .getQuantity());
11907         ++NI;
11908       }
11909       // Mark linear parameters.
11910       auto *SI = Attr->steps_begin();
11911       auto *MI = Attr->modifiers_begin();
11912       for (const Expr *E : Attr->linears()) {
11913         E = E->IgnoreParenImpCasts();
11914         unsigned Pos;
11915         bool IsReferenceType = false;
11916         // Rescaling factor needed to compute the linear parameter
11917         // value in the mangled name.
11918         unsigned PtrRescalingFactor = 1;
11919         if (isa<CXXThisExpr>(E)) {
11920           Pos = ParamPositions[FD];
11921           auto *P = cast<PointerType>(E->getType());
11922           PtrRescalingFactor = CGM.getContext()
11923                                    .getTypeSizeInChars(P->getPointeeType())
11924                                    .getQuantity();
11925         } else {
11926           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11927                                 ->getCanonicalDecl();
11928           auto It = ParamPositions.find(PVD);
11929           assert(It != ParamPositions.end() && "Function parameter not found");
11930           Pos = It->second;
11931           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11932             PtrRescalingFactor = CGM.getContext()
11933                                      .getTypeSizeInChars(P->getPointeeType())
11934                                      .getQuantity();
11935           else if (PVD->getType()->isReferenceType()) {
11936             IsReferenceType = true;
11937             PtrRescalingFactor =
11938                 CGM.getContext()
11939                     .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11940                     .getQuantity();
11941           }
11942         }
11943         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11944         if (*MI == OMPC_LINEAR_ref)
11945           ParamAttr.Kind = LinearRef;
11946         else if (*MI == OMPC_LINEAR_uval)
11947           ParamAttr.Kind = LinearUVal;
11948         else if (IsReferenceType)
11949           ParamAttr.Kind = LinearVal;
11950         else
11951           ParamAttr.Kind = Linear;
11952         // Assuming a stride of 1, for `linear` without modifiers.
11953         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11954         if (*SI) {
11955           Expr::EvalResult Result;
11956           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11957             if (const auto *DRE =
11958                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11959               if (const auto *StridePVD =
11960                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11961                 ParamAttr.HasVarStride = true;
11962                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11963                 assert(It != ParamPositions.end() &&
11964                        "Function parameter not found");
11965                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11966               }
11967             }
11968           } else {
11969             ParamAttr.StrideOrArg = Result.Val.getInt();
11970           }
11971         }
11972         // If we are using a linear clause on a pointer, we need to
11973         // rescale the value of linear_step with the byte size of the
11974         // pointee type.
11975         if (!ParamAttr.HasVarStride &&
11976             (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11977           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11978         ++SI;
11979         ++MI;
11980       }
11981       llvm::APSInt VLENVal;
11982       SourceLocation ExprLoc;
11983       const Expr *VLENExpr = Attr->getSimdlen();
11984       if (VLENExpr) {
11985         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11986         ExprLoc = VLENExpr->getExprLoc();
11987       }
11988       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11989       if (CGM.getTriple().isX86()) {
11990         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11991       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11992         unsigned VLEN = VLENVal.getExtValue();
11993         StringRef MangledName = Fn->getName();
11994         if (CGM.getTarget().hasFeature("sve"))
11995           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11996                                          MangledName, 's', 128, Fn, ExprLoc);
11997         if (CGM.getTarget().hasFeature("neon"))
11998           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11999                                          MangledName, 'n', 128, Fn, ExprLoc);
12000       }
12001     }
12002     FD = FD->getPreviousDecl();
12003   }
12004 }
12005 
12006 namespace {
12007 /// Cleanup action for doacross support.
12008 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12009 public:
12010   static const int DoacrossFinArgs = 2;
12011 
12012 private:
12013   llvm::FunctionCallee RTLFn;
12014   llvm::Value *Args[DoacrossFinArgs];
12015 
12016 public:
12017   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12018                     ArrayRef<llvm::Value *> CallArgs)
12019       : RTLFn(RTLFn) {
12020     assert(CallArgs.size() == DoacrossFinArgs);
12021     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12022   }
12023   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12024     if (!CGF.HaveInsertPoint())
12025       return;
12026     CGF.EmitRuntimeCall(RTLFn, Args);
12027   }
12028 };
12029 } // namespace
12030 
12031 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12032                                        const OMPLoopDirective &D,
12033                                        ArrayRef<Expr *> NumIterations) {
12034   if (!CGF.HaveInsertPoint())
12035     return;
12036 
12037   ASTContext &C = CGM.getContext();
12038   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12039   RecordDecl *RD;
12040   if (KmpDimTy.isNull()) {
12041     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12042     //  kmp_int64 lo; // lower
12043     //  kmp_int64 up; // upper
12044     //  kmp_int64 st; // stride
12045     // };
12046     RD = C.buildImplicitRecord("kmp_dim");
12047     RD->startDefinition();
12048     addFieldToRecordDecl(C, RD, Int64Ty);
12049     addFieldToRecordDecl(C, RD, Int64Ty);
12050     addFieldToRecordDecl(C, RD, Int64Ty);
12051     RD->completeDefinition();
12052     KmpDimTy = C.getRecordType(RD);
12053   } else {
12054     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12055   }
12056   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12057   QualType ArrayTy =
12058       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12059 
12060   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12061   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12062   enum { LowerFD = 0, UpperFD, StrideFD };
12063   // Fill dims with data.
12064   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12065     LValue DimsLVal = CGF.MakeAddrLValue(
12066         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12067     // dims.upper = num_iterations;
12068     LValue UpperLVal = CGF.EmitLValueForField(
12069         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12070     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12071         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12072         Int64Ty, NumIterations[I]->getExprLoc());
12073     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12074     // dims.stride = 1;
12075     LValue StrideLVal = CGF.EmitLValueForField(
12076         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12077     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12078                           StrideLVal);
12079   }
12080 
12081   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12082   // kmp_int32 num_dims, struct kmp_dim * dims);
12083   llvm::Value *Args[] = {
12084       emitUpdateLocation(CGF, D.getBeginLoc()),
12085       getThreadID(CGF, D.getBeginLoc()),
12086       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12087       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12088           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12089           CGM.VoidPtrTy)};
12090 
12091   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12092       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12093   CGF.EmitRuntimeCall(RTLFn, Args);
12094   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12095       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12096   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12097       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12098   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12099                                              llvm::makeArrayRef(FiniArgs));
12100 }
12101 
12102 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12103                                           const OMPDependClause *C) {
12104   QualType Int64Ty =
12105       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12106   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12107   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12108       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12109   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12110   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12111     const Expr *CounterVal = C->getLoopData(I);
12112     assert(CounterVal);
12113     llvm::Value *CntVal = CGF.EmitScalarConversion(
12114         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12115         CounterVal->getExprLoc());
12116     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12117                           /*Volatile=*/false, Int64Ty);
12118   }
12119   llvm::Value *Args[] = {
12120       emitUpdateLocation(CGF, C->getBeginLoc()),
12121       getThreadID(CGF, C->getBeginLoc()),
12122       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12123   llvm::FunctionCallee RTLFn;
12124   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12125     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12126                                                   OMPRTL___kmpc_doacross_post);
12127   } else {
12128     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12129     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12130                                                   OMPRTL___kmpc_doacross_wait);
12131   }
12132   CGF.EmitRuntimeCall(RTLFn, Args);
12133 }
12134 
12135 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12136                                llvm::FunctionCallee Callee,
12137                                ArrayRef<llvm::Value *> Args) const {
12138   assert(Loc.isValid() && "Outlined function call location must be valid.");
12139   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12140 
12141   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12142     if (Fn->doesNotThrow()) {
12143       CGF.EmitNounwindRuntimeCall(Fn, Args);
12144       return;
12145     }
12146   }
12147   CGF.EmitRuntimeCall(Callee, Args);
12148 }
12149 
12150 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12151     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12152     ArrayRef<llvm::Value *> Args) const {
12153   emitCall(CGF, Loc, OutlinedFn, Args);
12154 }
12155 
12156 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12157   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12158     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12159       HasEmittedDeclareTargetRegion = true;
12160 }
12161 
12162 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12163                                              const VarDecl *NativeParam,
12164                                              const VarDecl *TargetParam) const {
12165   return CGF.GetAddrOfLocalVar(NativeParam);
12166 }
12167 
12168 /// Return allocator value from expression, or return a null allocator (default
12169 /// when no allocator specified).
12170 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12171                                     const Expr *Allocator) {
12172   llvm::Value *AllocVal;
12173   if (Allocator) {
12174     AllocVal = CGF.EmitScalarExpr(Allocator);
12175     // According to the standard, the original allocator type is a enum
12176     // (integer). Convert to pointer type, if required.
12177     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12178                                         CGF.getContext().VoidPtrTy,
12179                                         Allocator->getExprLoc());
12180   } else {
12181     // If no allocator specified, it defaults to the null allocator.
12182     AllocVal = llvm::Constant::getNullValue(
12183         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12184   }
12185   return AllocVal;
12186 }
12187 
12188 /// Return the alignment from an allocate directive if present.
12189 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12190   llvm::Optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12191 
12192   if (!AllocateAlignment)
12193     return nullptr;
12194 
12195   return llvm::ConstantInt::get(CGM.SizeTy,
12196                                 AllocateAlignment.getValue().getQuantity());
12197 }
12198 
12199 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12200                                                    const VarDecl *VD) {
12201   if (!VD)
12202     return Address::invalid();
12203   Address UntiedAddr = Address::invalid();
12204   Address UntiedRealAddr = Address::invalid();
12205   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12206   if (It != FunctionToUntiedTaskStackMap.end()) {
12207     const UntiedLocalVarsAddressesMap &UntiedData =
12208         UntiedLocalVarsStack[It->second];
12209     auto I = UntiedData.find(VD);
12210     if (I != UntiedData.end()) {
12211       UntiedAddr = I->second.first;
12212       UntiedRealAddr = I->second.second;
12213     }
12214   }
12215   const VarDecl *CVD = VD->getCanonicalDecl();
12216   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12217     // Use the default allocation.
12218     if (!isAllocatableDecl(VD))
12219       return UntiedAddr;
12220     llvm::Value *Size;
12221     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12222     if (CVD->getType()->isVariablyModifiedType()) {
12223       Size = CGF.getTypeSize(CVD->getType());
12224       // Align the size: ((size + align - 1) / align) * align
12225       Size = CGF.Builder.CreateNUWAdd(
12226           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12227       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12228       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12229     } else {
12230       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12231       Size = CGM.getSize(Sz.alignTo(Align));
12232     }
12233     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12234     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12235     const Expr *Allocator = AA->getAllocator();
12236     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12237     llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
12238     SmallVector<llvm::Value *, 4> Args;
12239     Args.push_back(ThreadID);
12240     if (Alignment)
12241       Args.push_back(Alignment);
12242     Args.push_back(Size);
12243     Args.push_back(AllocVal);
12244     llvm::omp::RuntimeFunction FnID =
12245         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12246     llvm::Value *Addr = CGF.EmitRuntimeCall(
12247         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12248         getName({CVD->getName(), ".void.addr"}));
12249     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12250         CGM.getModule(), OMPRTL___kmpc_free);
12251     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12252     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12253         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12254     if (UntiedAddr.isValid())
12255       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12256 
12257     // Cleanup action for allocate support.
12258     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12259       llvm::FunctionCallee RTLFn;
12260       SourceLocation::UIntTy LocEncoding;
12261       Address Addr;
12262       const Expr *AllocExpr;
12263 
12264     public:
12265       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12266                            SourceLocation::UIntTy LocEncoding, Address Addr,
12267                            const Expr *AllocExpr)
12268           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12269             AllocExpr(AllocExpr) {}
12270       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12271         if (!CGF.HaveInsertPoint())
12272           return;
12273         llvm::Value *Args[3];
12274         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12275             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12276         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12277             Addr.getPointer(), CGF.VoidPtrTy);
12278         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12279         Args[2] = AllocVal;
12280         CGF.EmitRuntimeCall(RTLFn, Args);
12281       }
12282     };
12283     Address VDAddr =
12284         UntiedRealAddr.isValid()
12285             ? UntiedRealAddr
12286             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12287     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12288         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12289         VDAddr, Allocator);
12290     if (UntiedRealAddr.isValid())
12291       if (auto *Region =
12292               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12293         Region->emitUntiedSwitch(CGF);
12294     return VDAddr;
12295   }
12296   return UntiedAddr;
12297 }
12298 
12299 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12300                                              const VarDecl *VD) const {
12301   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12302   if (It == FunctionToUntiedTaskStackMap.end())
12303     return false;
12304   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12305 }
12306 
12307 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12308     CodeGenModule &CGM, const OMPLoopDirective &S)
12309     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12310   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12311   if (!NeedToPush)
12312     return;
12313   NontemporalDeclsSet &DS =
12314       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12315   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12316     for (const Stmt *Ref : C->private_refs()) {
12317       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12318       const ValueDecl *VD;
12319       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12320         VD = DRE->getDecl();
12321       } else {
12322         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12323         assert((ME->isImplicitCXXThis() ||
12324                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12325                "Expected member of current class.");
12326         VD = ME->getMemberDecl();
12327       }
12328       DS.insert(VD);
12329     }
12330   }
12331 }
12332 
12333 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12334   if (!NeedToPush)
12335     return;
12336   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12337 }
12338 
12339 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12340     CodeGenFunction &CGF,
12341     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12342                           std::pair<Address, Address>> &LocalVars)
12343     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12344   if (!NeedToPush)
12345     return;
12346   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12347       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12348   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12349 }
12350 
12351 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12352   if (!NeedToPush)
12353     return;
12354   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12355 }
12356 
12357 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12358   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12359 
12360   return llvm::any_of(
12361       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12362       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12363 }
12364 
12365 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12366     const OMPExecutableDirective &S,
12367     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12368     const {
12369   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12370   // Vars in target/task regions must be excluded completely.
12371   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12372       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12373     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12374     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12375     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12376     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12377       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12378         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12379     }
12380   }
12381   // Exclude vars in private clauses.
12382   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12383     for (const Expr *Ref : C->varlists()) {
12384       if (!Ref->getType()->isScalarType())
12385         continue;
12386       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12387       if (!DRE)
12388         continue;
12389       NeedToCheckForLPCs.insert(DRE->getDecl());
12390     }
12391   }
12392   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12393     for (const Expr *Ref : C->varlists()) {
12394       if (!Ref->getType()->isScalarType())
12395         continue;
12396       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12397       if (!DRE)
12398         continue;
12399       NeedToCheckForLPCs.insert(DRE->getDecl());
12400     }
12401   }
12402   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12403     for (const Expr *Ref : C->varlists()) {
12404       if (!Ref->getType()->isScalarType())
12405         continue;
12406       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12407       if (!DRE)
12408         continue;
12409       NeedToCheckForLPCs.insert(DRE->getDecl());
12410     }
12411   }
12412   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12413     for (const Expr *Ref : C->varlists()) {
12414       if (!Ref->getType()->isScalarType())
12415         continue;
12416       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12417       if (!DRE)
12418         continue;
12419       NeedToCheckForLPCs.insert(DRE->getDecl());
12420     }
12421   }
12422   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12423     for (const Expr *Ref : C->varlists()) {
12424       if (!Ref->getType()->isScalarType())
12425         continue;
12426       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12427       if (!DRE)
12428         continue;
12429       NeedToCheckForLPCs.insert(DRE->getDecl());
12430     }
12431   }
12432   for (const Decl *VD : NeedToCheckForLPCs) {
12433     for (const LastprivateConditionalData &Data :
12434          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12435       if (Data.DeclToUniqueName.count(VD) > 0) {
12436         if (!Data.Disabled)
12437           NeedToAddForLPCsAsDisabled.insert(VD);
12438         break;
12439       }
12440     }
12441   }
12442 }
12443 
12444 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12445     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12446     : CGM(CGF.CGM),
12447       Action((CGM.getLangOpts().OpenMP >= 50 &&
12448               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12449                            [](const OMPLastprivateClause *C) {
12450                              return C->getKind() ==
12451                                     OMPC_LASTPRIVATE_conditional;
12452                            }))
12453                  ? ActionToDo::PushAsLastprivateConditional
12454                  : ActionToDo::DoNotPush) {
12455   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12456   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12457     return;
12458   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12459          "Expected a push action.");
12460   LastprivateConditionalData &Data =
12461       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12462   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12463     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12464       continue;
12465 
12466     for (const Expr *Ref : C->varlists()) {
12467       Data.DeclToUniqueName.insert(std::make_pair(
12468           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12469           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12470     }
12471   }
12472   Data.IVLVal = IVLVal;
12473   Data.Fn = CGF.CurFn;
12474 }
12475 
12476 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12477     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12478     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12479   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12480   if (CGM.getLangOpts().OpenMP < 50)
12481     return;
12482   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12483   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12484   if (!NeedToAddForLPCsAsDisabled.empty()) {
12485     Action = ActionToDo::DisableLastprivateConditional;
12486     LastprivateConditionalData &Data =
12487         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12488     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12489       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12490     Data.Fn = CGF.CurFn;
12491     Data.Disabled = true;
12492   }
12493 }
12494 
12495 CGOpenMPRuntime::LastprivateConditionalRAII
12496 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12497     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12498   return LastprivateConditionalRAII(CGF, S);
12499 }
12500 
12501 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12502   if (CGM.getLangOpts().OpenMP < 50)
12503     return;
12504   if (Action == ActionToDo::DisableLastprivateConditional) {
12505     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12506            "Expected list of disabled private vars.");
12507     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12508   }
12509   if (Action == ActionToDo::PushAsLastprivateConditional) {
12510     assert(
12511         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12512         "Expected list of lastprivate conditional vars.");
12513     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12514   }
12515 }
12516 
12517 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12518                                                         const VarDecl *VD) {
12519   ASTContext &C = CGM.getContext();
12520   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12521   if (I == LastprivateConditionalToTypes.end())
12522     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12523   QualType NewType;
12524   const FieldDecl *VDField;
12525   const FieldDecl *FiredField;
12526   LValue BaseLVal;
12527   auto VI = I->getSecond().find(VD);
12528   if (VI == I->getSecond().end()) {
12529     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12530     RD->startDefinition();
12531     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12532     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12533     RD->completeDefinition();
12534     NewType = C.getRecordType(RD);
12535     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12536     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12537     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12538   } else {
12539     NewType = std::get<0>(VI->getSecond());
12540     VDField = std::get<1>(VI->getSecond());
12541     FiredField = std::get<2>(VI->getSecond());
12542     BaseLVal = std::get<3>(VI->getSecond());
12543   }
12544   LValue FiredLVal =
12545       CGF.EmitLValueForField(BaseLVal, FiredField);
12546   CGF.EmitStoreOfScalar(
12547       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12548       FiredLVal);
12549   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12550 }
12551 
12552 namespace {
12553 /// Checks if the lastprivate conditional variable is referenced in LHS.
12554 class LastprivateConditionalRefChecker final
12555     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12556   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12557   const Expr *FoundE = nullptr;
12558   const Decl *FoundD = nullptr;
12559   StringRef UniqueDeclName;
12560   LValue IVLVal;
12561   llvm::Function *FoundFn = nullptr;
12562   SourceLocation Loc;
12563 
12564 public:
12565   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12566     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12567          llvm::reverse(LPM)) {
12568       auto It = D.DeclToUniqueName.find(E->getDecl());
12569       if (It == D.DeclToUniqueName.end())
12570         continue;
12571       if (D.Disabled)
12572         return false;
12573       FoundE = E;
12574       FoundD = E->getDecl()->getCanonicalDecl();
12575       UniqueDeclName = It->second;
12576       IVLVal = D.IVLVal;
12577       FoundFn = D.Fn;
12578       break;
12579     }
12580     return FoundE == E;
12581   }
12582   bool VisitMemberExpr(const MemberExpr *E) {
12583     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12584       return false;
12585     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12586          llvm::reverse(LPM)) {
12587       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12588       if (It == D.DeclToUniqueName.end())
12589         continue;
12590       if (D.Disabled)
12591         return false;
12592       FoundE = E;
12593       FoundD = E->getMemberDecl()->getCanonicalDecl();
12594       UniqueDeclName = It->second;
12595       IVLVal = D.IVLVal;
12596       FoundFn = D.Fn;
12597       break;
12598     }
12599     return FoundE == E;
12600   }
12601   bool VisitStmt(const Stmt *S) {
12602     for (const Stmt *Child : S->children()) {
12603       if (!Child)
12604         continue;
12605       if (const auto *E = dyn_cast<Expr>(Child))
12606         if (!E->isGLValue())
12607           continue;
12608       if (Visit(Child))
12609         return true;
12610     }
12611     return false;
12612   }
12613   explicit LastprivateConditionalRefChecker(
12614       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12615       : LPM(LPM) {}
12616   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12617   getFoundData() const {
12618     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12619   }
12620 };
12621 } // namespace
12622 
12623 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12624                                                        LValue IVLVal,
12625                                                        StringRef UniqueDeclName,
12626                                                        LValue LVal,
12627                                                        SourceLocation Loc) {
12628   // Last updated loop counter for the lastprivate conditional var.
12629   // int<xx> last_iv = 0;
12630   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12631   llvm::Constant *LastIV =
12632       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12633   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12634       IVLVal.getAlignment().getAsAlign());
12635   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12636 
12637   // Last value of the lastprivate conditional.
12638   // decltype(priv_a) last_a;
12639   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12640       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12641   Last->setAlignment(LVal.getAlignment().getAsAlign());
12642   LValue LastLVal = CGF.MakeAddrLValue(
12643       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12644 
12645   // Global loop counter. Required to handle inner parallel-for regions.
12646   // iv
12647   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12648 
12649   // #pragma omp critical(a)
12650   // if (last_iv <= iv) {
12651   //   last_iv = iv;
12652   //   last_a = priv_a;
12653   // }
12654   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12655                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12656     Action.Enter(CGF);
12657     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12658     // (last_iv <= iv) ? Check if the variable is updated and store new
12659     // value in global var.
12660     llvm::Value *CmpRes;
12661     if (IVLVal.getType()->isSignedIntegerType()) {
12662       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12663     } else {
12664       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12665              "Loop iteration variable must be integer.");
12666       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12667     }
12668     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12669     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12670     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12671     // {
12672     CGF.EmitBlock(ThenBB);
12673 
12674     //   last_iv = iv;
12675     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12676 
12677     //   last_a = priv_a;
12678     switch (CGF.getEvaluationKind(LVal.getType())) {
12679     case TEK_Scalar: {
12680       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12681       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12682       break;
12683     }
12684     case TEK_Complex: {
12685       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12686       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12687       break;
12688     }
12689     case TEK_Aggregate:
12690       llvm_unreachable(
12691           "Aggregates are not supported in lastprivate conditional.");
12692     }
12693     // }
12694     CGF.EmitBranch(ExitBB);
12695     // There is no need to emit line number for unconditional branch.
12696     (void)ApplyDebugLocation::CreateEmpty(CGF);
12697     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12698   };
12699 
12700   if (CGM.getLangOpts().OpenMPSimd) {
12701     // Do not emit as a critical region as no parallel region could be emitted.
12702     RegionCodeGenTy ThenRCG(CodeGen);
12703     ThenRCG(CGF);
12704   } else {
12705     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12706   }
12707 }
12708 
12709 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12710                                                          const Expr *LHS) {
12711   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12712     return;
12713   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12714   if (!Checker.Visit(LHS))
12715     return;
12716   const Expr *FoundE;
12717   const Decl *FoundD;
12718   StringRef UniqueDeclName;
12719   LValue IVLVal;
12720   llvm::Function *FoundFn;
12721   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12722       Checker.getFoundData();
12723   if (FoundFn != CGF.CurFn) {
12724     // Special codegen for inner parallel regions.
12725     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12726     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12727     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12728            "Lastprivate conditional is not found in outer region.");
12729     QualType StructTy = std::get<0>(It->getSecond());
12730     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12731     LValue PrivLVal = CGF.EmitLValue(FoundE);
12732     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12733         PrivLVal.getAddress(CGF),
12734         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12735         CGF.ConvertTypeForMem(StructTy));
12736     LValue BaseLVal =
12737         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12738     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12739     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12740                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12741                         FiredLVal, llvm::AtomicOrdering::Unordered,
12742                         /*IsVolatile=*/true, /*isInit=*/false);
12743     return;
12744   }
12745 
12746   // Private address of the lastprivate conditional in the current context.
12747   // priv_a
12748   LValue LVal = CGF.EmitLValue(FoundE);
12749   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12750                                    FoundE->getExprLoc());
12751 }
12752 
12753 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12754     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12755     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12756   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12757     return;
12758   auto Range = llvm::reverse(LastprivateConditionalStack);
12759   auto It = llvm::find_if(
12760       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12761   if (It == Range.end() || It->Fn != CGF.CurFn)
12762     return;
12763   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12764   assert(LPCI != LastprivateConditionalToTypes.end() &&
12765          "Lastprivates must be registered already.");
12766   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12767   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12768   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12769   for (const auto &Pair : It->DeclToUniqueName) {
12770     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12771     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12772       continue;
12773     auto I = LPCI->getSecond().find(Pair.first);
12774     assert(I != LPCI->getSecond().end() &&
12775            "Lastprivate must be rehistered already.");
12776     // bool Cmp = priv_a.Fired != 0;
12777     LValue BaseLVal = std::get<3>(I->getSecond());
12778     LValue FiredLVal =
12779         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12780     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12781     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12782     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12783     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12784     // if (Cmp) {
12785     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12786     CGF.EmitBlock(ThenBB);
12787     Address Addr = CGF.GetAddrOfLocalVar(VD);
12788     LValue LVal;
12789     if (VD->getType()->isReferenceType())
12790       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12791                                            AlignmentSource::Decl);
12792     else
12793       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12794                                 AlignmentSource::Decl);
12795     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12796                                      D.getBeginLoc());
12797     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12798     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12799     // }
12800   }
12801 }
12802 
12803 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12804     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12805     SourceLocation Loc) {
12806   if (CGF.getLangOpts().OpenMP < 50)
12807     return;
12808   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12809   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12810          "Unknown lastprivate conditional variable.");
12811   StringRef UniqueName = It->second;
12812   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12813   // The variable was not updated in the region - exit.
12814   if (!GV)
12815     return;
12816   LValue LPLVal = CGF.MakeAddrLValue(
12817       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12818       PrivLVal.getType().getNonReferenceType());
12819   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12820   CGF.EmitStoreOfScalar(Res, PrivLVal);
12821 }
12822 
12823 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12824     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12825     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12826   llvm_unreachable("Not supported in SIMD-only mode");
12827 }
12828 
12829 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12830     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12831     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12832   llvm_unreachable("Not supported in SIMD-only mode");
12833 }
12834 
12835 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12836     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12837     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12838     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12839     bool Tied, unsigned &NumberOfParts) {
12840   llvm_unreachable("Not supported in SIMD-only mode");
12841 }
12842 
12843 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12844                                            SourceLocation Loc,
12845                                            llvm::Function *OutlinedFn,
12846                                            ArrayRef<llvm::Value *> CapturedVars,
12847                                            const Expr *IfCond,
12848                                            llvm::Value *NumThreads) {
12849   llvm_unreachable("Not supported in SIMD-only mode");
12850 }
12851 
12852 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12853     CodeGenFunction &CGF, StringRef CriticalName,
12854     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12855     const Expr *Hint) {
12856   llvm_unreachable("Not supported in SIMD-only mode");
12857 }
12858 
12859 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12860                                            const RegionCodeGenTy &MasterOpGen,
12861                                            SourceLocation Loc) {
12862   llvm_unreachable("Not supported in SIMD-only mode");
12863 }
12864 
12865 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12866                                            const RegionCodeGenTy &MasterOpGen,
12867                                            SourceLocation Loc,
12868                                            const Expr *Filter) {
12869   llvm_unreachable("Not supported in SIMD-only mode");
12870 }
12871 
12872 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12873                                             SourceLocation Loc) {
12874   llvm_unreachable("Not supported in SIMD-only mode");
12875 }
12876 
12877 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12878     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12879     SourceLocation Loc) {
12880   llvm_unreachable("Not supported in SIMD-only mode");
12881 }
12882 
12883 void CGOpenMPSIMDRuntime::emitSingleRegion(
12884     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12885     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12886     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12887     ArrayRef<const Expr *> AssignmentOps) {
12888   llvm_unreachable("Not supported in SIMD-only mode");
12889 }
12890 
12891 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12892                                             const RegionCodeGenTy &OrderedOpGen,
12893                                             SourceLocation Loc,
12894                                             bool IsThreads) {
12895   llvm_unreachable("Not supported in SIMD-only mode");
12896 }
12897 
12898 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12899                                           SourceLocation Loc,
12900                                           OpenMPDirectiveKind Kind,
12901                                           bool EmitChecks,
12902                                           bool ForceSimpleCall) {
12903   llvm_unreachable("Not supported in SIMD-only mode");
12904 }
12905 
12906 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12907     CodeGenFunction &CGF, SourceLocation Loc,
12908     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12909     bool Ordered, const DispatchRTInput &DispatchValues) {
12910   llvm_unreachable("Not supported in SIMD-only mode");
12911 }
12912 
12913 void CGOpenMPSIMDRuntime::emitForStaticInit(
12914     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12915     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12916   llvm_unreachable("Not supported in SIMD-only mode");
12917 }
12918 
12919 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12920     CodeGenFunction &CGF, SourceLocation Loc,
12921     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
12925 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12926                                                      SourceLocation Loc,
12927                                                      unsigned IVSize,
12928                                                      bool IVSigned) {
12929   llvm_unreachable("Not supported in SIMD-only mode");
12930 }
12931 
12932 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12933                                               SourceLocation Loc,
12934                                               OpenMPDirectiveKind DKind) {
12935   llvm_unreachable("Not supported in SIMD-only mode");
12936 }
12937 
12938 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12939                                               SourceLocation Loc,
12940                                               unsigned IVSize, bool IVSigned,
12941                                               Address IL, Address LB,
12942                                               Address UB, Address ST) {
12943   llvm_unreachable("Not supported in SIMD-only mode");
12944 }
12945 
12946 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12947                                                llvm::Value *NumThreads,
12948                                                SourceLocation Loc) {
12949   llvm_unreachable("Not supported in SIMD-only mode");
12950 }
12951 
12952 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12953                                              ProcBindKind ProcBind,
12954                                              SourceLocation Loc) {
12955   llvm_unreachable("Not supported in SIMD-only mode");
12956 }
12957 
12958 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12959                                                     const VarDecl *VD,
12960                                                     Address VDAddr,
12961                                                     SourceLocation Loc) {
12962   llvm_unreachable("Not supported in SIMD-only mode");
12963 }
12964 
12965 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12966     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12967     CodeGenFunction *CGF) {
12968   llvm_unreachable("Not supported in SIMD-only mode");
12969 }
12970 
12971 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12972     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12973   llvm_unreachable("Not supported in SIMD-only mode");
12974 }
12975 
12976 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12977                                     ArrayRef<const Expr *> Vars,
12978                                     SourceLocation Loc,
12979                                     llvm::AtomicOrdering AO) {
12980   llvm_unreachable("Not supported in SIMD-only mode");
12981 }
12982 
12983 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12984                                        const OMPExecutableDirective &D,
12985                                        llvm::Function *TaskFunction,
12986                                        QualType SharedsTy, Address Shareds,
12987                                        const Expr *IfCond,
12988                                        const OMPTaskDataTy &Data) {
12989   llvm_unreachable("Not supported in SIMD-only mode");
12990 }
12991 
12992 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12993     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12994     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12995     const Expr *IfCond, const OMPTaskDataTy &Data) {
12996   llvm_unreachable("Not supported in SIMD-only mode");
12997 }
12998 
12999 void CGOpenMPSIMDRuntime::emitReduction(
13000     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13001     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13002     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13003   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13004   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13005                                  ReductionOps, Options);
13006 }
13007 
13008 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13009     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13010     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13011   llvm_unreachable("Not supported in SIMD-only mode");
13012 }
13013 
13014 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13015                                                 SourceLocation Loc,
13016                                                 bool IsWorksharingReduction) {
13017   llvm_unreachable("Not supported in SIMD-only mode");
13018 }
13019 
13020 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13021                                                   SourceLocation Loc,
13022                                                   ReductionCodeGen &RCG,
13023                                                   unsigned N) {
13024   llvm_unreachable("Not supported in SIMD-only mode");
13025 }
13026 
13027 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13028                                                   SourceLocation Loc,
13029                                                   llvm::Value *ReductionsPtr,
13030                                                   LValue SharedLVal) {
13031   llvm_unreachable("Not supported in SIMD-only mode");
13032 }
13033 
13034 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13035                                            SourceLocation Loc,
13036                                            const OMPTaskDataTy &Data) {
13037   llvm_unreachable("Not supported in SIMD-only mode");
13038 }
13039 
13040 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13041     CodeGenFunction &CGF, SourceLocation Loc,
13042     OpenMPDirectiveKind CancelRegion) {
13043   llvm_unreachable("Not supported in SIMD-only mode");
13044 }
13045 
13046 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13047                                          SourceLocation Loc, const Expr *IfCond,
13048                                          OpenMPDirectiveKind CancelRegion) {
13049   llvm_unreachable("Not supported in SIMD-only mode");
13050 }
13051 
13052 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13053     const OMPExecutableDirective &D, StringRef ParentName,
13054     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13055     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13056   llvm_unreachable("Not supported in SIMD-only mode");
13057 }
13058 
13059 void CGOpenMPSIMDRuntime::emitTargetCall(
13060     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13061     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13062     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13063     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13064                                      const OMPLoopDirective &D)>
13065         SizeEmitter) {
13066   llvm_unreachable("Not supported in SIMD-only mode");
13067 }
13068 
13069 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13070   llvm_unreachable("Not supported in SIMD-only mode");
13071 }
13072 
13073 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13074   llvm_unreachable("Not supported in SIMD-only mode");
13075 }
13076 
13077 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13078   return false;
13079 }
13080 
13081 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13082                                         const OMPExecutableDirective &D,
13083                                         SourceLocation Loc,
13084                                         llvm::Function *OutlinedFn,
13085                                         ArrayRef<llvm::Value *> CapturedVars) {
13086   llvm_unreachable("Not supported in SIMD-only mode");
13087 }
13088 
13089 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13090                                              const Expr *NumTeams,
13091                                              const Expr *ThreadLimit,
13092                                              SourceLocation Loc) {
13093   llvm_unreachable("Not supported in SIMD-only mode");
13094 }
13095 
13096 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13097     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13098     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13099   llvm_unreachable("Not supported in SIMD-only mode");
13100 }
13101 
13102 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13103     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13104     const Expr *Device) {
13105   llvm_unreachable("Not supported in SIMD-only mode");
13106 }
13107 
13108 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13109                                            const OMPLoopDirective &D,
13110                                            ArrayRef<Expr *> NumIterations) {
13111   llvm_unreachable("Not supported in SIMD-only mode");
13112 }
13113 
13114 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13115                                               const OMPDependClause *C) {
13116   llvm_unreachable("Not supported in SIMD-only mode");
13117 }
13118 
13119 const VarDecl *
13120 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13121                                         const VarDecl *NativeParam) const {
13122   llvm_unreachable("Not supported in SIMD-only mode");
13123 }
13124 
13125 Address
13126 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13127                                          const VarDecl *NativeParam,
13128                                          const VarDecl *TargetParam) const {
13129   llvm_unreachable("Not supported in SIMD-only mode");
13130 }
13131