1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49 
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54   /// Kinds of OpenMP regions used in codegen.
55   enum CGOpenMPRegionKind {
56     /// Region with outlined function for standalone 'parallel'
57     /// directive.
58     ParallelOutlinedRegion,
59     /// Region with outlined function for standalone 'task' directive.
60     TaskOutlinedRegion,
61     /// Region for constructs that do not require function outlining,
62     /// like 'for', 'sections', 'atomic' etc. directives.
63     InlinedRegion,
64     /// Region with outlined function for standalone 'target' directive.
65     TargetRegion,
66   };
67 
68   CGOpenMPRegionInfo(const CapturedStmt &CS,
69                      const CGOpenMPRegionKind RegionKind,
70                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71                      bool HasCancel)
72       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 
75   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77                      bool HasCancel)
78       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79         Kind(Kind), HasCancel(HasCancel) {}
80 
81   /// Get a variable or parameter for storing global thread id
82   /// inside OpenMP construct.
83   virtual const VarDecl *getThreadIDVariable() const = 0;
84 
85   /// Emit the captured statement body.
86   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 
88   /// Get an LValue for the current ThreadID variable.
89   /// \return LValue for thread id variable. This LValue always has type int32*.
90   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 
92   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 
94   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 
96   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 
98   bool hasCancel() const { return HasCancel; }
99 
100   static bool classof(const CGCapturedStmtInfo *Info) {
101     return Info->getKind() == CR_OpenMP;
102   }
103 
104   ~CGOpenMPRegionInfo() override = default;
105 
106 protected:
107   CGOpenMPRegionKind RegionKind;
108   RegionCodeGenTy CodeGen;
109   OpenMPDirectiveKind Kind;
110   bool HasCancel;
111 };
112 
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
116   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117                              const RegionCodeGenTy &CodeGen,
118                              OpenMPDirectiveKind Kind, bool HasCancel,
119                              StringRef HelperName)
120       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121                            HasCancel),
122         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124   }
125 
126   /// Get a variable or parameter for storing global thread id
127   /// inside OpenMP construct.
128   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 
130   /// Get the name of the capture helper.
131   StringRef getHelperName() const override { return HelperName; }
132 
133   static bool classof(const CGCapturedStmtInfo *Info) {
134     return CGOpenMPRegionInfo::classof(Info) &&
135            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136                ParallelOutlinedRegion;
137   }
138 
139 private:
140   /// A variable or parameter storing global thread id for OpenMP
141   /// constructs.
142   const VarDecl *ThreadIDVar;
143   StringRef HelperName;
144 };
145 
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149   class UntiedTaskActionTy final : public PrePostActionTy {
150     bool Untied;
151     const VarDecl *PartIDVar;
152     const RegionCodeGenTy UntiedCodeGen;
153     llvm::SwitchInst *UntiedSwitch = nullptr;
154 
155   public:
156     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157                        const RegionCodeGenTy &UntiedCodeGen)
158         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159     void Enter(CodeGenFunction &CGF) override {
160       if (Untied) {
161         // Emit task switching point.
162         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163             CGF.GetAddrOfLocalVar(PartIDVar),
164             PartIDVar->getType()->castAs<PointerType>());
165         llvm::Value *Res =
166             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169         CGF.EmitBlock(DoneBB);
170         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173                               CGF.Builder.GetInsertBlock());
174         emitUntiedSwitch(CGF);
175       }
176     }
177     void emitUntiedSwitch(CodeGenFunction &CGF) const {
178       if (Untied) {
179         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180             CGF.GetAddrOfLocalVar(PartIDVar),
181             PartIDVar->getType()->castAs<PointerType>());
182         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183                               PartIdLVal);
184         UntiedCodeGen(CGF);
185         CodeGenFunction::JumpDest CurPoint =
186             CGF.getJumpDestInCurrentScope(".untied.next.");
187         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190                               CGF.Builder.GetInsertBlock());
191         CGF.EmitBranchThroughCleanup(CurPoint);
192         CGF.EmitBlock(CurPoint.getBlock());
193       }
194     }
195     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196   };
197   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198                                  const VarDecl *ThreadIDVar,
199                                  const RegionCodeGenTy &CodeGen,
200                                  OpenMPDirectiveKind Kind, bool HasCancel,
201                                  const UntiedTaskActionTy &Action)
202       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203         ThreadIDVar(ThreadIDVar), Action(Action) {
204     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205   }
206 
207   /// Get a variable or parameter for storing global thread id
208   /// inside OpenMP construct.
209   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 
211   /// Get an LValue for the current ThreadID variable.
212   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 
214   /// Get the name of the capture helper.
215   StringRef getHelperName() const override { return ".omp_outlined."; }
216 
217   void emitUntiedSwitch(CodeGenFunction &CGF) override {
218     Action.emitUntiedSwitch(CGF);
219   }
220 
221   static bool classof(const CGCapturedStmtInfo *Info) {
222     return CGOpenMPRegionInfo::classof(Info) &&
223            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224                TaskOutlinedRegion;
225   }
226 
227 private:
228   /// A variable or parameter storing global thread id for OpenMP
229   /// constructs.
230   const VarDecl *ThreadIDVar;
231   /// Action for emitting code for untied tasks.
232   const UntiedTaskActionTy &Action;
233 };
234 
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
239   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240                             const RegionCodeGenTy &CodeGen,
241                             OpenMPDirectiveKind Kind, bool HasCancel)
242       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243         OldCSI(OldCSI),
244         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 
246   // Retrieve the value of the context parameter.
247   llvm::Value *getContextValue() const override {
248     if (OuterRegionInfo)
249       return OuterRegionInfo->getContextValue();
250     llvm_unreachable("No context value for inlined OpenMP region");
251   }
252 
253   void setContextValue(llvm::Value *V) override {
254     if (OuterRegionInfo) {
255       OuterRegionInfo->setContextValue(V);
256       return;
257     }
258     llvm_unreachable("No context value for inlined OpenMP region");
259   }
260 
261   /// Lookup the captured field decl for a variable.
262   const FieldDecl *lookup(const VarDecl *VD) const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->lookup(VD);
265     // If there is no outer outlined region,no need to lookup in a list of
266     // captured variables, we can use the original one.
267     return nullptr;
268   }
269 
270   FieldDecl *getThisFieldDecl() const override {
271     if (OuterRegionInfo)
272       return OuterRegionInfo->getThisFieldDecl();
273     return nullptr;
274   }
275 
276   /// Get a variable or parameter for storing global thread id
277   /// inside OpenMP construct.
278   const VarDecl *getThreadIDVariable() const override {
279     if (OuterRegionInfo)
280       return OuterRegionInfo->getThreadIDVariable();
281     return nullptr;
282   }
283 
284   /// Get an LValue for the current ThreadID variable.
285   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286     if (OuterRegionInfo)
287       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288     llvm_unreachable("No LValue for inlined OpenMP construct");
289   }
290 
291   /// Get the name of the capture helper.
292   StringRef getHelperName() const override {
293     if (auto *OuterRegionInfo = getOldCSI())
294       return OuterRegionInfo->getHelperName();
295     llvm_unreachable("No helper name for inlined OpenMP construct");
296   }
297 
298   void emitUntiedSwitch(CodeGenFunction &CGF) override {
299     if (OuterRegionInfo)
300       OuterRegionInfo->emitUntiedSwitch(CGF);
301   }
302 
303   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 
305   static bool classof(const CGCapturedStmtInfo *Info) {
306     return CGOpenMPRegionInfo::classof(Info) &&
307            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308   }
309 
310   ~CGOpenMPInlinedRegionInfo() override = default;
311 
312 private:
313   /// CodeGen info about outer OpenMP region.
314   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315   CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317 
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
325   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
327       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328                            /*HasCancel=*/false),
329         HelperName(HelperName) {}
330 
331   /// This is unused for target regions because each starts executing
332   /// with a single thread.
333   const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 
335   /// Get the name of the capture helper.
336   StringRef getHelperName() const override { return HelperName; }
337 
338   static bool classof(const CGCapturedStmtInfo *Info) {
339     return CGOpenMPRegionInfo::classof(Info) &&
340            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341   }
342 
343 private:
344   StringRef HelperName;
345 };
346 
347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348   llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
354   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356                                   OMPD_unknown,
357                                   /*HasCancel=*/false),
358         PrivScope(CGF) {
359     // Make sure the globals captured in the provided statement are local by
360     // using the privatization logic. We assume the same variable is not
361     // captured more than once.
362     for (const auto &C : CS.captures()) {
363       if (!C.capturesVariable() && !C.capturesVariableByCopy())
364         continue;
365 
366       const VarDecl *VD = C.getCapturedVar();
367       if (VD->isLocalVarDeclOrParm())
368         continue;
369 
370       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371                       /*RefersToEnclosingVariableOrCapture=*/false,
372                       VD->getType().getNonReferenceType(), VK_LValue,
373                       C.getLocation());
374       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
375     }
376     (void)PrivScope.Privatize();
377   }
378 
379   /// Lookup the captured field decl for a variable.
380   const FieldDecl *lookup(const VarDecl *VD) const override {
381     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382       return FD;
383     return nullptr;
384   }
385 
386   /// Emit the captured statement body.
387   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388     llvm_unreachable("No body for expressions");
389   }
390 
391   /// Get a variable or parameter for storing global thread id
392   /// inside OpenMP construct.
393   const VarDecl *getThreadIDVariable() const override {
394     llvm_unreachable("No thread id for expressions");
395   }
396 
397   /// Get the name of the capture helper.
398   StringRef getHelperName() const override {
399     llvm_unreachable("No helper name for expressions");
400   }
401 
402   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405   /// Private scope to capture global variables.
406   CodeGenFunction::OMPPrivateScope PrivScope;
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411   CodeGenFunction &CGF;
412   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
413   FieldDecl *LambdaThisCaptureField = nullptr;
414   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415   bool NoInheritance = false;
416 
417 public:
418   /// Constructs region for combined constructs.
419   /// \param CodeGen Code generation sequence for combined directives. Includes
420   /// a list of functions used for code generation of implicitly inlined
421   /// regions.
422   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423                           OpenMPDirectiveKind Kind, bool HasCancel,
424                           bool NoInheritance = true)
425       : CGF(CGF), NoInheritance(NoInheritance) {
426     // Start emission for the construct.
427     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429     if (NoInheritance) {
430       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432       CGF.LambdaThisCaptureField = nullptr;
433       BlockInfo = CGF.BlockInfo;
434       CGF.BlockInfo = nullptr;
435     }
436   }
437 
438   ~InlinedOpenMPRegionRAII() {
439     // Restore original CapturedStmtInfo only if we're done with code emission.
440     auto *OldCSI =
441         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442     delete CGF.CapturedStmtInfo;
443     CGF.CapturedStmtInfo = OldCSI;
444     if (NoInheritance) {
445       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447       CGF.BlockInfo = BlockInfo;
448     }
449   }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456   /// Use trampoline for internal microtask.
457   OMP_IDENT_IMD = 0x01,
458   /// Use c-style ident structure.
459   OMP_IDENT_KMPC = 0x02,
460   /// Atomic reduction option for kmpc_reduce.
461   OMP_ATOMIC_REDUCE = 0x10,
462   /// Explicit 'barrier' directive.
463   OMP_IDENT_BARRIER_EXPL = 0x20,
464   /// Implicit barrier in code.
465   OMP_IDENT_BARRIER_IMPL = 0x40,
466   /// Implicit barrier in 'for' directive.
467   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468   /// Implicit barrier in 'sections' directive.
469   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470   /// Implicit barrier in 'single' directive.
471   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472   /// Call of __kmp_for_static_init for static loop.
473   OMP_IDENT_WORK_LOOP = 0x200,
474   /// Call of __kmp_for_static_init for sections.
475   OMP_IDENT_WORK_SECTIONS = 0x400,
476   /// Call of __kmp_for_static_init for distribute.
477   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 namespace {
482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
483 /// Values for bit flags for marking which requires clauses have been used.
484 enum OpenMPOffloadingRequiresDirFlags : int64_t {
485   /// flag undefined.
486   OMP_REQ_UNDEFINED               = 0x000,
487   /// no requires clause present.
488   OMP_REQ_NONE                    = 0x001,
489   /// reverse_offload clause.
490   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
491   /// unified_address clause.
492   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
493   /// unified_shared_memory clause.
494   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
495   /// dynamic_allocators clause.
496   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
497   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
498 };
499 
500 enum OpenMPOffloadingReservedDeviceIDs {
501   /// Device ID if the device was not defined, runtime should get it
502   /// from environment variables in the spec.
503   OMP_DEVICEID_UNDEF = -1,
504 };
505 } // anonymous namespace
506 
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
513 ///                                  see above  */
514 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
515 ///                                  KMP_IDENT_KMPC identifies this union
516 ///                                  member  */
517 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
518 ///                                  see above */
519 ///#if USE_ITT_BUILD
520 ///                            /*  but currently used for storing
521 ///                                region-specific ITT */
522 ///                            /*  contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
525 ///                                 C++  */
526 ///    char const *psource;    /**< String describing the source location.
527 ///                            The string is composed of semi-colon separated
528 //                             fields which describe the source file,
529 ///                            the function and a pair of line numbers that
530 ///                            delimit the construct.
531 ///                             */
532 /// } ident_t;
533 enum IdentFieldIndex {
534   /// might be used in Fortran
535   IdentField_Reserved_1,
536   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537   IdentField_Flags,
538   /// Not really used in Fortran any more
539   IdentField_Reserved_2,
540   /// Source[4] in Fortran, do not use for C++
541   IdentField_Reserved_3,
542   /// String describing the source location. The string is composed of
543   /// semi-colon separated fields which describe the source file, the function
544   /// and a pair of line numbers that delimit the construct.
545   IdentField_PSource
546 };
547 
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType {
551   /// Lower bound for default (unordered) versions.
552   OMP_sch_lower = 32,
553   OMP_sch_static_chunked = 33,
554   OMP_sch_static = 34,
555   OMP_sch_dynamic_chunked = 35,
556   OMP_sch_guided_chunked = 36,
557   OMP_sch_runtime = 37,
558   OMP_sch_auto = 38,
559   /// static with chunk adjustment (e.g., simd)
560   OMP_sch_static_balanced_chunked = 45,
561   /// Lower bound for 'ordered' versions.
562   OMP_ord_lower = 64,
563   OMP_ord_static_chunked = 65,
564   OMP_ord_static = 66,
565   OMP_ord_dynamic_chunked = 67,
566   OMP_ord_guided_chunked = 68,
567   OMP_ord_runtime = 69,
568   OMP_ord_auto = 70,
569   OMP_sch_default = OMP_sch_static,
570   /// dist_schedule types
571   OMP_dist_sch_static_chunked = 91,
572   OMP_dist_sch_static = 92,
573   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574   /// Set if the monotonic schedule modifier was present.
575   OMP_sch_modifier_monotonic = (1 << 29),
576   /// Set if the nonmonotonic schedule modifier was present.
577   OMP_sch_modifier_nonmonotonic = (1 << 30),
578 };
579 
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581 /// region.
582 class CleanupTy final : public EHScopeStack::Cleanup {
583   PrePostActionTy *Action;
584 
585 public:
586   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588     if (!CGF.HaveInsertPoint())
589       return;
590     Action->Exit(CGF);
591   }
592 };
593 
594 } // anonymous namespace
595 
596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
597   CodeGenFunction::RunCleanupsScope Scope(CGF);
598   if (PrePostAction) {
599     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600     Callback(CodeGen, CGF, *PrePostAction);
601   } else {
602     PrePostActionTy Action;
603     Callback(CodeGen, CGF, Action);
604   }
605 }
606 
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl *
610 getReductionInit(const Expr *ReductionOp) {
611   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613       if (const auto *DRE =
614               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616           return DRD;
617   return nullptr;
618 }
619 
620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
621                                              const OMPDeclareReductionDecl *DRD,
622                                              const Expr *InitOp,
623                                              Address Private, Address Original,
624                                              QualType Ty) {
625   if (DRD->getInitializer()) {
626     std::pair<llvm::Function *, llvm::Function *> Reduction =
627         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
628     const auto *CE = cast<CallExpr>(InitOp);
629     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632     const auto *LHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634     const auto *RHSDRE =
635         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   if (DRD)
691     SrcAddr =
692         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
693 
694   llvm::Value *SrcBegin = nullptr;
695   if (DRD)
696     SrcBegin = SrcAddr.getPointer();
697   llvm::Value *DestBegin = DestAddr.getPointer();
698   // Cast from pointer to array type to pointer to single element.
699   llvm::Value *DestEnd =
700       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
701   // The basic structure here is a while-do loop.
702   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704   llvm::Value *IsEmpty =
705       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707 
708   // Enter the loop body, making that address the current address.
709   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710   CGF.EmitBlock(BodyBB);
711 
712   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713 
714   llvm::PHINode *SrcElementPHI = nullptr;
715   Address SrcElementCurrent = Address::invalid();
716   if (DRD) {
717     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718                                           "omp.arraycpy.srcElementPast");
719     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720     SrcElementCurrent =
721         Address(SrcElementPHI, SrcAddr.getElementType(),
722                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723   }
724   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726   DestElementPHI->addIncoming(DestBegin, EntryBB);
727   Address DestElementCurrent =
728       Address(DestElementPHI, DestAddr.getElementType(),
729               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731   // Emit copy.
732   {
733     CodeGenFunction::RunCleanupsScope InitScope(CGF);
734     if (EmitDeclareReductionInit) {
735       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736                                        SrcElementCurrent, ElementTy);
737     } else
738       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739                            /*IsInitializer=*/false);
740   }
741 
742   if (DRD) {
743     // Shift the address forward by one element.
744     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
746         "omp.arraycpy.dest.element");
747     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748   }
749 
750   // Shift the address forward by one element.
751   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
753       "omp.arraycpy.dest.element");
754   // Check whether we've reached the end.
755   llvm::Value *Done =
756       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
757   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
758   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
759 
760   // Done.
761   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
762 }
763 
764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
765   return CGF.EmitOMPSharedLValue(E);
766 }
767 
768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
769                                             const Expr *E) {
770   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
771     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
772   return LValue();
773 }
774 
775 void ReductionCodeGen::emitAggregateInitialization(
776     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
777     const OMPDeclareReductionDecl *DRD) {
778   // Emit VarDecl with copy init for arrays.
779   // Get the address of the original variable captured in current
780   // captured region.
781   const auto *PrivateVD =
782       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
783   bool EmitDeclareReductionInit =
784       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
785   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
786                        EmitDeclareReductionInit,
787                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
788                                                 : PrivateVD->getInit(),
789                        DRD, SharedAddr);
790 }
791 
792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
793                                    ArrayRef<const Expr *> Origs,
794                                    ArrayRef<const Expr *> Privates,
795                                    ArrayRef<const Expr *> ReductionOps) {
796   ClausesData.reserve(Shareds.size());
797   SharedAddresses.reserve(Shareds.size());
798   Sizes.reserve(Shareds.size());
799   BaseDecls.reserve(Shareds.size());
800   const auto *IOrig = Origs.begin();
801   const auto *IPriv = Privates.begin();
802   const auto *IRed = ReductionOps.begin();
803   for (const Expr *Ref : Shareds) {
804     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
805     std::advance(IOrig, 1);
806     std::advance(IPriv, 1);
807     std::advance(IRed, 1);
808   }
809 }
810 
811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
812   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
813          "Number of generated lvalues must be exactly N.");
814   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
815   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
816   SharedAddresses.emplace_back(First, Second);
817   if (ClausesData[N].Shared == ClausesData[N].Ref) {
818     OrigAddresses.emplace_back(First, Second);
819   } else {
820     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
821     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
822     OrigAddresses.emplace_back(First, Second);
823   }
824 }
825 
826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
827   QualType PrivateType = getPrivateType(N);
828   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829   if (!PrivateType->isVariablyModifiedType()) {
830     Sizes.emplace_back(
831         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832         nullptr);
833     return;
834   }
835   llvm::Value *Size;
836   llvm::Value *SizeInChars;
837   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
838   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
839   if (AsArraySection) {
840     Size = CGF.Builder.CreatePtrDiff(ElemType,
841                                      OrigAddresses[N].second.getPointer(CGF),
842                                      OrigAddresses[N].first.getPointer(CGF));
843     Size = CGF.Builder.CreateNUWAdd(
844         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
845     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
846   } else {
847     SizeInChars =
848         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
849     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
850   }
851   Sizes.emplace_back(SizeInChars, Size);
852   CodeGenFunction::OpaqueValueMapping OpaqueMap(
853       CGF,
854       cast<OpaqueValueExpr>(
855           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
856       RValue::get(Size));
857   CGF.EmitVariablyModifiedType(PrivateType);
858 }
859 
860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
861                                          llvm::Value *Size) {
862   QualType PrivateType = getPrivateType(N);
863   if (!PrivateType->isVariablyModifiedType()) {
864     assert(!Size && !Sizes[N].second &&
865            "Size should be nullptr for non-variably modified reduction "
866            "items.");
867     return;
868   }
869   CodeGenFunction::OpaqueValueMapping OpaqueMap(
870       CGF,
871       cast<OpaqueValueExpr>(
872           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
873       RValue::get(Size));
874   CGF.EmitVariablyModifiedType(PrivateType);
875 }
876 
877 void ReductionCodeGen::emitInitialization(
878     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
879     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
880   assert(SharedAddresses.size() > N && "No variable was generated");
881   const auto *PrivateVD =
882       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
883   const OMPDeclareReductionDecl *DRD =
884       getReductionInit(ClausesData[N].ReductionOp);
885   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
886     if (DRD && DRD->getInitializer())
887       (void)DefaultInit(CGF);
888     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
889   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
890     (void)DefaultInit(CGF);
891     QualType SharedType = SharedAddresses[N].first.getType();
892     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
893                                      PrivateAddr, SharedAddr, SharedType);
894   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897                          PrivateVD->getType().getQualifiers(),
898                          /*IsInitializer=*/false);
899   }
900 }
901 
902 bool ReductionCodeGen::needCleanups(unsigned N) {
903   QualType PrivateType = getPrivateType(N);
904   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
905   return DTorKind != QualType::DK_none;
906 }
907 
908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
909                                     Address PrivateAddr) {
910   QualType PrivateType = getPrivateType(N);
911   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
912   if (needCleanups(N)) {
913     PrivateAddr = CGF.Builder.CreateElementBitCast(
914         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
915     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
916   }
917 }
918 
919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
920                           LValue BaseLV) {
921   BaseTy = BaseTy.getNonReferenceType();
922   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
923          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
924     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
925       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
926     } else {
927       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
928       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
929     }
930     BaseTy = BaseTy->getPointeeType();
931   }
932   return CGF.MakeAddrLValue(
933       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
934                                        CGF.ConvertTypeForMem(ElTy)),
935       BaseLV.getType(), BaseLV.getBaseInfo(),
936       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
937 }
938 
939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           Address OriginalBaseAddress, llvm::Value *Addr) {
941   Address Tmp = Address::invalid();
942   Address TopTmp = Address::invalid();
943   Address MostTopTmp = Address::invalid();
944   BaseTy = BaseTy.getNonReferenceType();
945   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
946          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
947     Tmp = CGF.CreateMemTemp(BaseTy);
948     if (TopTmp.isValid())
949       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
950     else
951       MostTopTmp = Tmp;
952     TopTmp = Tmp;
953     BaseTy = BaseTy->getPointeeType();
954   }
955 
956   if (Tmp.isValid()) {
957     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
958         Addr, Tmp.getElementType());
959     CGF.Builder.CreateStore(Addr, Tmp);
960     return MostTopTmp;
961   }
962 
963   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
964       Addr, OriginalBaseAddress.getType());
965   return OriginalBaseAddress.withPointer(Addr);
966 }
967 
968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
969   const VarDecl *OrigVD = nullptr;
970   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
971     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
972     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
973       Base = TempOASE->getBase()->IgnoreParenImpCasts();
974     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
975       Base = TempASE->getBase()->IgnoreParenImpCasts();
976     DE = cast<DeclRefExpr>(Base);
977     OrigVD = cast<VarDecl>(DE->getDecl());
978   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
979     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
980     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981       Base = TempASE->getBase()->IgnoreParenImpCasts();
982     DE = cast<DeclRefExpr>(Base);
983     OrigVD = cast<VarDecl>(DE->getDecl());
984   }
985   return OrigVD;
986 }
987 
988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
989                                                Address PrivateAddr) {
990   const DeclRefExpr *DE;
991   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
992     BaseDecls.emplace_back(OrigVD);
993     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
994     LValue BaseLValue =
995         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
996                     OriginalBaseLValue);
997     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
998     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
999         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1000         SharedAddr.getPointer());
1001     llvm::Value *PrivatePointer =
1002         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1003             PrivateAddr.getPointer(), SharedAddr.getType());
1004     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1005         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1006     return castToBase(CGF, OrigVD->getType(),
1007                       SharedAddresses[N].first.getType(),
1008                       OriginalBaseLValue.getAddress(CGF), Ptr);
1009   }
1010   BaseDecls.emplace_back(
1011       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1012   return PrivateAddr;
1013 }
1014 
1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1016   const OMPDeclareReductionDecl *DRD =
1017       getReductionInit(ClausesData[N].ReductionOp);
1018   return DRD && DRD->getInitializer();
1019 }
1020 
1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1022   return CGF.EmitLoadOfPointerLValue(
1023       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1024       getThreadIDVariable()->getType()->castAs<PointerType>());
1025 }
1026 
1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1028   if (!CGF.HaveInsertPoint())
1029     return;
1030   // 1.2.2 OpenMP Language Terminology
1031   // Structured block - An executable statement with a single entry at the
1032   // top and a single exit at the bottom.
1033   // The point of exit cannot be a branch out of the structured block.
1034   // longjmp() and throw() must not violate the entry/exit criteria.
1035   CGF.EHStack.pushTerminate();
1036   if (S)
1037     CGF.incrementProfileCounter(S);
1038   CodeGen(CGF);
1039   CGF.EHStack.popTerminate();
1040 }
1041 
1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043     CodeGenFunction &CGF) {
1044   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045                             getThreadIDVariable()->getType(),
1046                             AlignmentSource::Decl);
1047 }
1048 
1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050                                        QualType FieldTy) {
1051   auto *Field = FieldDecl::Create(
1052       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055   Field->setAccess(AS_public);
1056   DC->addDecl(Field);
1057   return Field;
1058 }
1059 
1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061                                  StringRef Separator)
1062     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1064   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1065 
1066   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1067   OMPBuilder.initialize();
1068   loadOffloadInfoMetadata();
1069 }
1070 
1071 void CGOpenMPRuntime::clear() {
1072   InternalVars.clear();
1073   // Clean non-target variable declarations possibly used only in debug info.
1074   for (const auto &Data : EmittedNonTargetVariables) {
1075     if (!Data.getValue().pointsToAliveValue())
1076       continue;
1077     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1078     if (!GV)
1079       continue;
1080     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1081       continue;
1082     GV->eraseFromParent();
1083   }
1084 }
1085 
1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1087   SmallString<128> Buffer;
1088   llvm::raw_svector_ostream OS(Buffer);
1089   StringRef Sep = FirstSeparator;
1090   for (StringRef Part : Parts) {
1091     OS << Sep << Part;
1092     Sep = Separator;
1093   }
1094   return std::string(OS.str());
1095 }
1096 
1097 static llvm::Function *
1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1099                           const Expr *CombinerInitializer, const VarDecl *In,
1100                           const VarDecl *Out, bool IsCombiner) {
1101   // void .omp_combiner.(Ty *in, Ty *out);
1102   ASTContext &C = CGM.getContext();
1103   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1104   FunctionArgList Args;
1105   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1106                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1107   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1108                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   Args.push_back(&OmpOutParm);
1110   Args.push_back(&OmpInParm);
1111   const CGFunctionInfo &FnInfo =
1112       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1113   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1114   std::string Name = CGM.getOpenMPRuntime().getName(
1115       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1116   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1117                                     Name, &CGM.getModule());
1118   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1119   if (CGM.getLangOpts().Optimize) {
1120     Fn->removeFnAttr(llvm::Attribute::NoInline);
1121     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1122     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1123   }
1124   CodeGenFunction CGF(CGM);
1125   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1126   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1127   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1128                     Out->getLocation());
1129   CodeGenFunction::OMPPrivateScope Scope(CGF);
1130   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1131   Scope.addPrivate(
1132       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1133               .getAddress(CGF));
1134   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1135   Scope.addPrivate(
1136       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1137                .getAddress(CGF));
1138   (void)Scope.Privatize();
1139   if (!IsCombiner && Out->hasInit() &&
1140       !CGF.isTrivialInitializer(Out->getInit())) {
1141     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1142                          Out->getType().getQualifiers(),
1143                          /*IsInitializer=*/true);
1144   }
1145   if (CombinerInitializer)
1146     CGF.EmitIgnoredExpr(CombinerInitializer);
1147   Scope.ForceCleanup();
1148   CGF.FinishFunction();
1149   return Fn;
1150 }
1151 
1152 void CGOpenMPRuntime::emitUserDefinedReduction(
1153     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1154   if (UDRMap.count(D) > 0)
1155     return;
1156   llvm::Function *Combiner = emitCombinerOrInitializer(
1157       CGM, D->getType(), D->getCombiner(),
1158       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1159       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1160       /*IsCombiner=*/true);
1161   llvm::Function *Initializer = nullptr;
1162   if (const Expr *Init = D->getInitializer()) {
1163     Initializer = emitCombinerOrInitializer(
1164         CGM, D->getType(),
1165         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1166                                                                      : nullptr,
1167         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1168         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1169         /*IsCombiner=*/false);
1170   }
1171   UDRMap.try_emplace(D, Combiner, Initializer);
1172   if (CGF) {
1173     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1174     Decls.second.push_back(D);
1175   }
1176 }
1177 
1178 std::pair<llvm::Function *, llvm::Function *>
1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1180   auto I = UDRMap.find(D);
1181   if (I != UDRMap.end())
1182     return I->second;
1183   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1184   return UDRMap.lookup(D);
1185 }
1186 
1187 namespace {
1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1189 // Builder if one is present.
1190 struct PushAndPopStackRAII {
1191   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1192                       bool HasCancel, llvm::omp::Directive Kind)
1193       : OMPBuilder(OMPBuilder) {
1194     if (!OMPBuilder)
1195       return;
1196 
1197     // The following callback is the crucial part of clangs cleanup process.
1198     //
1199     // NOTE:
1200     // Once the OpenMPIRBuilder is used to create parallel regions (and
1201     // similar), the cancellation destination (Dest below) is determined via
1202     // IP. That means if we have variables to finalize we split the block at IP,
1203     // use the new block (=BB) as destination to build a JumpDest (via
1204     // getJumpDestInCurrentScope(BB)) which then is fed to
1205     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1206     // to push & pop an FinalizationInfo object.
1207     // The FiniCB will still be needed but at the point where the
1208     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1209     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1210       assert(IP.getBlock()->end() == IP.getPoint() &&
1211              "Clang CG should cause non-terminated block!");
1212       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1213       CGF.Builder.restoreIP(IP);
1214       CodeGenFunction::JumpDest Dest =
1215           CGF.getOMPCancelDestination(OMPD_parallel);
1216       CGF.EmitBranchThroughCleanup(Dest);
1217     };
1218 
1219     // TODO: Remove this once we emit parallel regions through the
1220     //       OpenMPIRBuilder as it can do this setup internally.
1221     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1222     OMPBuilder->pushFinalizationCB(std::move(FI));
1223   }
1224   ~PushAndPopStackRAII() {
1225     if (OMPBuilder)
1226       OMPBuilder->popFinalizationCB();
1227   }
1228   llvm::OpenMPIRBuilder *OMPBuilder;
1229 };
1230 } // namespace
1231 
1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1233     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1234     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1235     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1236   assert(ThreadIDVar->getType()->isPointerType() &&
1237          "thread id variable must be of type kmp_int32 *");
1238   CodeGenFunction CGF(CGM, true);
1239   bool HasCancel = false;
1240   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1241     HasCancel = OPD->hasCancel();
1242   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1243     HasCancel = OPD->hasCancel();
1244   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1245     HasCancel = OPSD->hasCancel();
1246   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1247     HasCancel = OPFD->hasCancel();
1248   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1249     HasCancel = OPFD->hasCancel();
1250   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1251     HasCancel = OPFD->hasCancel();
1252   else if (const auto *OPFD =
1253                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD =
1256                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1257     HasCancel = OPFD->hasCancel();
1258 
1259   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1260   //       parallel region to make cancellation barriers work properly.
1261   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1262   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1263   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1264                                     HasCancel, OutlinedHelperName);
1265   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1266   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1267 }
1268 
1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1270     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1271     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1272   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1273   return emitParallelOrTeamsOutlinedFunction(
1274       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1275 }
1276 
1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1278     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1280   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1281   return emitParallelOrTeamsOutlinedFunction(
1282       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1283 }
1284 
1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1286     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1288     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1289     bool Tied, unsigned &NumberOfParts) {
1290   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1291                                               PrePostActionTy &) {
1292     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1293     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1294     llvm::Value *TaskArgs[] = {
1295         UpLoc, ThreadID,
1296         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1297                                     TaskTVar->getType()->castAs<PointerType>())
1298             .getPointer(CGF)};
1299     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1301                         TaskArgs);
1302   };
1303   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1304                                                             UntiedCodeGen);
1305   CodeGen.setAction(Action);
1306   assert(!ThreadIDVar->getType()->isPointerType() &&
1307          "thread id variable must be of type kmp_int32 for tasks");
1308   const OpenMPDirectiveKind Region =
1309       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1310                                                       : OMPD_task;
1311   const CapturedStmt *CS = D.getCapturedStmt(Region);
1312   bool HasCancel = false;
1313   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1314     HasCancel = TD->hasCancel();
1315   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1316     HasCancel = TD->hasCancel();
1317   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1318     HasCancel = TD->hasCancel();
1319   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1320     HasCancel = TD->hasCancel();
1321 
1322   CodeGenFunction CGF(CGM, true);
1323   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1324                                         InnermostKind, HasCancel, Action);
1325   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1326   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1327   if (!Tied)
1328     NumberOfParts = Action.getNumberOfParts();
1329   return Res;
1330 }
1331 
1332 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1333                              const RecordDecl *RD, const CGRecordLayout &RL,
1334                              ArrayRef<llvm::Constant *> Data) {
1335   llvm::StructType *StructTy = RL.getLLVMType();
1336   unsigned PrevIdx = 0;
1337   ConstantInitBuilder CIBuilder(CGM);
1338   const auto *DI = Data.begin();
1339   for (const FieldDecl *FD : RD->fields()) {
1340     unsigned Idx = RL.getLLVMFieldNo(FD);
1341     // Fill the alignment.
1342     for (unsigned I = PrevIdx; I < Idx; ++I)
1343       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1344     PrevIdx = Idx + 1;
1345     Fields.add(*DI);
1346     ++DI;
1347   }
1348 }
1349 
1350 template <class... As>
1351 static llvm::GlobalVariable *
1352 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1353                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1354                    As &&... Args) {
1355   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1356   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1357   ConstantInitBuilder CIBuilder(CGM);
1358   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1359   buildStructValue(Fields, CGM, RD, RL, Data);
1360   return Fields.finishAndCreateGlobal(
1361       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1362       std::forward<As>(Args)...);
1363 }
1364 
1365 template <typename T>
1366 static void
1367 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1368                                          ArrayRef<llvm::Constant *> Data,
1369                                          T &Parent) {
1370   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1371   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1372   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1373   buildStructValue(Fields, CGM, RD, RL, Data);
1374   Fields.finishAndAddTo(Parent);
1375 }
1376 
1377 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1378                                              bool AtCurrentPoint) {
1379   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1380   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1381 
1382   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1383   if (AtCurrentPoint) {
1384     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1385         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1386   } else {
1387     Elem.second.ServiceInsertPt =
1388         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1389     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1390   }
1391 }
1392 
1393 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1394   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1395   if (Elem.second.ServiceInsertPt) {
1396     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1397     Elem.second.ServiceInsertPt = nullptr;
1398     Ptr->eraseFromParent();
1399   }
1400 }
1401 
1402 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1403                                                   SourceLocation Loc,
1404                                                   SmallString<128> &Buffer) {
1405   llvm::raw_svector_ostream OS(Buffer);
1406   // Build debug location
1407   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1408   OS << ";" << PLoc.getFilename() << ";";
1409   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1410     OS << FD->getQualifiedNameAsString();
1411   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1412   return OS.str();
1413 }
1414 
1415 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1416                                                  SourceLocation Loc,
1417                                                  unsigned Flags) {
1418   uint32_t SrcLocStrSize;
1419   llvm::Constant *SrcLocStr;
1420   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1421       Loc.isInvalid()) {
1422     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1423   } else {
1424     std::string FunctionName;
1425     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1426       FunctionName = FD->getQualifiedNameAsString();
1427     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1428     const char *FileName = PLoc.getFilename();
1429     unsigned Line = PLoc.getLine();
1430     unsigned Column = PLoc.getColumn();
1431     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1432                                                 Column, SrcLocStrSize);
1433   }
1434   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1435   return OMPBuilder.getOrCreateIdent(
1436       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1437 }
1438 
1439 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1440                                           SourceLocation Loc) {
1441   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1442   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1443   // the clang invariants used below might be broken.
1444   if (CGM.getLangOpts().OpenMPIRBuilder) {
1445     SmallString<128> Buffer;
1446     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1447     uint32_t SrcLocStrSize;
1448     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1449         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1450     return OMPBuilder.getOrCreateThreadID(
1451         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1452   }
1453 
1454   llvm::Value *ThreadID = nullptr;
1455   // Check whether we've already cached a load of the thread id in this
1456   // function.
1457   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1458   if (I != OpenMPLocThreadIDMap.end()) {
1459     ThreadID = I->second.ThreadID;
1460     if (ThreadID != nullptr)
1461       return ThreadID;
1462   }
1463   // If exceptions are enabled, do not use parameter to avoid possible crash.
1464   if (auto *OMPRegionInfo =
1465           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1466     if (OMPRegionInfo->getThreadIDVariable()) {
1467       // Check if this an outlined function with thread id passed as argument.
1468       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1469       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1470       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1471           !CGF.getLangOpts().CXXExceptions ||
1472           CGF.Builder.GetInsertBlock() == TopBlock ||
1473           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1474           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1475               TopBlock ||
1476           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1477               CGF.Builder.GetInsertBlock()) {
1478         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1479         // If value loaded in entry block, cache it and use it everywhere in
1480         // function.
1481         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1482           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1483           Elem.second.ThreadID = ThreadID;
1484         }
1485         return ThreadID;
1486       }
1487     }
1488   }
1489 
1490   // This is not an outlined function region - need to call __kmpc_int32
1491   // kmpc_global_thread_num(ident_t *loc).
1492   // Generate thread id value and cache this value for use across the
1493   // function.
1494   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1495   if (!Elem.second.ServiceInsertPt)
1496     setLocThreadIdInsertPt(CGF);
1497   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1498   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1499   llvm::CallInst *Call = CGF.Builder.CreateCall(
1500       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1501                                             OMPRTL___kmpc_global_thread_num),
1502       emitUpdateLocation(CGF, Loc));
1503   Call->setCallingConv(CGF.getRuntimeCC());
1504   Elem.second.ThreadID = Call;
1505   return Call;
1506 }
1507 
1508 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1509   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1510   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1511     clearLocThreadIdInsertPt(CGF);
1512     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1513   }
1514   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1515     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1516       UDRMap.erase(D);
1517     FunctionUDRMap.erase(CGF.CurFn);
1518   }
1519   auto I = FunctionUDMMap.find(CGF.CurFn);
1520   if (I != FunctionUDMMap.end()) {
1521     for(const auto *D : I->second)
1522       UDMMap.erase(D);
1523     FunctionUDMMap.erase(I);
1524   }
1525   LastprivateConditionalToTypes.erase(CGF.CurFn);
1526   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1527 }
1528 
1529 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1530   return OMPBuilder.IdentPtr;
1531 }
1532 
1533 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1534   if (!Kmpc_MicroTy) {
1535     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1536     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1537                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1538     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1539   }
1540   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1541 }
1542 
1543 llvm::FunctionCallee
1544 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1545                                              bool IsGPUDistribute) {
1546   assert((IVSize == 32 || IVSize == 64) &&
1547          "IV size is not compatible with the omp runtime");
1548   StringRef Name;
1549   if (IsGPUDistribute)
1550     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1551                                     : "__kmpc_distribute_static_init_4u")
1552                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1553                                     : "__kmpc_distribute_static_init_8u");
1554   else
1555     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1556                                     : "__kmpc_for_static_init_4u")
1557                         : (IVSigned ? "__kmpc_for_static_init_8"
1558                                     : "__kmpc_for_static_init_8u");
1559 
1560   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1561   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1562   llvm::Type *TypeParams[] = {
1563     getIdentTyPointerTy(),                     // loc
1564     CGM.Int32Ty,                               // tid
1565     CGM.Int32Ty,                               // schedtype
1566     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1567     PtrTy,                                     // p_lower
1568     PtrTy,                                     // p_upper
1569     PtrTy,                                     // p_stride
1570     ITy,                                       // incr
1571     ITy                                        // chunk
1572   };
1573   auto *FnTy =
1574       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1575   return CGM.CreateRuntimeFunction(FnTy, Name);
1576 }
1577 
1578 llvm::FunctionCallee
1579 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1580   assert((IVSize == 32 || IVSize == 64) &&
1581          "IV size is not compatible with the omp runtime");
1582   StringRef Name =
1583       IVSize == 32
1584           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1585           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1586   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1587   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1588                                CGM.Int32Ty,           // tid
1589                                CGM.Int32Ty,           // schedtype
1590                                ITy,                   // lower
1591                                ITy,                   // upper
1592                                ITy,                   // stride
1593                                ITy                    // chunk
1594   };
1595   auto *FnTy =
1596       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1597   return CGM.CreateRuntimeFunction(FnTy, Name);
1598 }
1599 
1600 llvm::FunctionCallee
1601 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1602   assert((IVSize == 32 || IVSize == 64) &&
1603          "IV size is not compatible with the omp runtime");
1604   StringRef Name =
1605       IVSize == 32
1606           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1607           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1608   llvm::Type *TypeParams[] = {
1609       getIdentTyPointerTy(), // loc
1610       CGM.Int32Ty,           // tid
1611   };
1612   auto *FnTy =
1613       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1614   return CGM.CreateRuntimeFunction(FnTy, Name);
1615 }
1616 
1617 llvm::FunctionCallee
1618 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1619   assert((IVSize == 32 || IVSize == 64) &&
1620          "IV size is not compatible with the omp runtime");
1621   StringRef Name =
1622       IVSize == 32
1623           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1624           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1625   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1626   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1627   llvm::Type *TypeParams[] = {
1628     getIdentTyPointerTy(),                     // loc
1629     CGM.Int32Ty,                               // tid
1630     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1631     PtrTy,                                     // p_lower
1632     PtrTy,                                     // p_upper
1633     PtrTy                                      // p_stride
1634   };
1635   auto *FnTy =
1636       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1637   return CGM.CreateRuntimeFunction(FnTy, Name);
1638 }
1639 
1640 /// Obtain information that uniquely identifies a target entry. This
1641 /// consists of the file and device IDs as well as line number associated with
1642 /// the relevant entry source location.
1643 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1644                                      unsigned &DeviceID, unsigned &FileID,
1645                                      unsigned &LineNum) {
1646   SourceManager &SM = C.getSourceManager();
1647 
1648   // The loc should be always valid and have a file ID (the user cannot use
1649   // #pragma directives in macros)
1650 
1651   assert(Loc.isValid() && "Source location is expected to be always valid.");
1652 
1653   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1654   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1655 
1656   llvm::sys::fs::UniqueID ID;
1657   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1658     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1659     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1660     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1661       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1662           << PLoc.getFilename() << EC.message();
1663   }
1664 
1665   DeviceID = ID.getDevice();
1666   FileID = ID.getFile();
1667   LineNum = PLoc.getLine();
1668 }
1669 
1670 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1671   if (CGM.getLangOpts().OpenMPSimd)
1672     return Address::invalid();
1673   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1674       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1675   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1676               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1677                HasRequiresUnifiedSharedMemory))) {
1678     SmallString<64> PtrName;
1679     {
1680       llvm::raw_svector_ostream OS(PtrName);
1681       OS << CGM.getMangledName(GlobalDecl(VD));
1682       if (!VD->isExternallyVisible()) {
1683         unsigned DeviceID, FileID, Line;
1684         getTargetEntryUniqueInfo(CGM.getContext(),
1685                                  VD->getCanonicalDecl()->getBeginLoc(),
1686                                  DeviceID, FileID, Line);
1687         OS << llvm::format("_%x", FileID);
1688       }
1689       OS << "_decl_tgt_ref_ptr";
1690     }
1691     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1692     QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1693     llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1694     if (!Ptr) {
1695       Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1696 
1697       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1698       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1699 
1700       if (!CGM.getLangOpts().OpenMPIsDevice)
1701         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1702       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1703     }
1704     return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1705   }
1706   return Address::invalid();
1707 }
1708 
1709 llvm::Constant *
1710 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1711   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1712          !CGM.getContext().getTargetInfo().isTLSSupported());
1713   // Lookup the entry, lazily creating it if necessary.
1714   std::string Suffix = getName({"cache", ""});
1715   return getOrCreateInternalVariable(
1716       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1717 }
1718 
1719 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1720                                                 const VarDecl *VD,
1721                                                 Address VDAddr,
1722                                                 SourceLocation Loc) {
1723   if (CGM.getLangOpts().OpenMPUseTLS &&
1724       CGM.getContext().getTargetInfo().isTLSSupported())
1725     return VDAddr;
1726 
1727   llvm::Type *VarTy = VDAddr.getElementType();
1728   llvm::Value *Args[] = {
1729       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1730       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1731       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1732       getOrCreateThreadPrivateCache(VD)};
1733   return Address(
1734       CGF.EmitRuntimeCall(
1735           OMPBuilder.getOrCreateRuntimeFunction(
1736               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1737           Args),
1738       CGF.Int8Ty, VDAddr.getAlignment());
1739 }
1740 
1741 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1742     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1743     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1744   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1745   // library.
1746   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1747   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1748                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1749                       OMPLoc);
1750   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1751   // to register constructor/destructor for variable.
1752   llvm::Value *Args[] = {
1753       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1754       Ctor, CopyCtor, Dtor};
1755   CGF.EmitRuntimeCall(
1756       OMPBuilder.getOrCreateRuntimeFunction(
1757           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1758       Args);
1759 }
1760 
1761 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1762     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1763     bool PerformInit, CodeGenFunction *CGF) {
1764   if (CGM.getLangOpts().OpenMPUseTLS &&
1765       CGM.getContext().getTargetInfo().isTLSSupported())
1766     return nullptr;
1767 
1768   VD = VD->getDefinition(CGM.getContext());
1769   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1770     QualType ASTTy = VD->getType();
1771 
1772     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1773     const Expr *Init = VD->getAnyInitializer();
1774     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1775       // Generate function that re-emits the declaration's initializer into the
1776       // threadprivate copy of the variable VD
1777       CodeGenFunction CtorCGF(CGM);
1778       FunctionArgList Args;
1779       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1780                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1781                             ImplicitParamDecl::Other);
1782       Args.push_back(&Dst);
1783 
1784       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1785           CGM.getContext().VoidPtrTy, Args);
1786       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1787       std::string Name = getName({"__kmpc_global_ctor_", ""});
1788       llvm::Function *Fn =
1789           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1790       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1791                             Args, Loc, Loc);
1792       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1793           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1794           CGM.getContext().VoidPtrTy, Dst.getLocation());
1795       Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1796       Arg = CtorCGF.Builder.CreateElementBitCast(
1797           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1798       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1799                                /*IsInitializer=*/true);
1800       ArgVal = CtorCGF.EmitLoadOfScalar(
1801           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1802           CGM.getContext().VoidPtrTy, Dst.getLocation());
1803       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1804       CtorCGF.FinishFunction();
1805       Ctor = Fn;
1806     }
1807     if (VD->getType().isDestructedType() != QualType::DK_none) {
1808       // Generate function that emits destructor call for the threadprivate copy
1809       // of the variable VD
1810       CodeGenFunction DtorCGF(CGM);
1811       FunctionArgList Args;
1812       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1813                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1814                             ImplicitParamDecl::Other);
1815       Args.push_back(&Dst);
1816 
1817       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1818           CGM.getContext().VoidTy, Args);
1819       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1820       std::string Name = getName({"__kmpc_global_dtor_", ""});
1821       llvm::Function *Fn =
1822           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1823       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1824       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1825                             Loc, Loc);
1826       // Create a scope with an artificial location for the body of this function.
1827       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1828       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1829           DtorCGF.GetAddrOfLocalVar(&Dst),
1830           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1831       DtorCGF.emitDestroy(
1832           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1833           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1834           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1835       DtorCGF.FinishFunction();
1836       Dtor = Fn;
1837     }
1838     // Do not emit init function if it is not required.
1839     if (!Ctor && !Dtor)
1840       return nullptr;
1841 
1842     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1843     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1844                                                /*isVarArg=*/false)
1845                            ->getPointerTo();
1846     // Copying constructor for the threadprivate variable.
1847     // Must be NULL - reserved by runtime, but currently it requires that this
1848     // parameter is always NULL. Otherwise it fires assertion.
1849     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1850     if (Ctor == nullptr) {
1851       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1852                                              /*isVarArg=*/false)
1853                          ->getPointerTo();
1854       Ctor = llvm::Constant::getNullValue(CtorTy);
1855     }
1856     if (Dtor == nullptr) {
1857       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1858                                              /*isVarArg=*/false)
1859                          ->getPointerTo();
1860       Dtor = llvm::Constant::getNullValue(DtorTy);
1861     }
1862     if (!CGF) {
1863       auto *InitFunctionTy =
1864           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1865       std::string Name = getName({"__omp_threadprivate_init_", ""});
1866       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1867           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1868       CodeGenFunction InitCGF(CGM);
1869       FunctionArgList ArgList;
1870       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1871                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1872                             Loc, Loc);
1873       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1874       InitCGF.FinishFunction();
1875       return InitFunction;
1876     }
1877     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1878   }
1879   return nullptr;
1880 }
1881 
1882 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1883                                                      llvm::GlobalVariable *Addr,
1884                                                      bool PerformInit) {
1885   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1886       !CGM.getLangOpts().OpenMPIsDevice)
1887     return false;
1888   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1889       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1890   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1891       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1892        HasRequiresUnifiedSharedMemory))
1893     return CGM.getLangOpts().OpenMPIsDevice;
1894   VD = VD->getDefinition(CGM.getContext());
1895   assert(VD && "Unknown VarDecl");
1896 
1897   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1898     return CGM.getLangOpts().OpenMPIsDevice;
1899 
1900   QualType ASTTy = VD->getType();
1901   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1902 
1903   // Produce the unique prefix to identify the new target regions. We use
1904   // the source location of the variable declaration which we know to not
1905   // conflict with any target region.
1906   unsigned DeviceID;
1907   unsigned FileID;
1908   unsigned Line;
1909   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1910   SmallString<128> Buffer, Out;
1911   {
1912     llvm::raw_svector_ostream OS(Buffer);
1913     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1914        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1915   }
1916 
1917   const Expr *Init = VD->getAnyInitializer();
1918   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1919     llvm::Constant *Ctor;
1920     llvm::Constant *ID;
1921     if (CGM.getLangOpts().OpenMPIsDevice) {
1922       // Generate function that re-emits the declaration's initializer into
1923       // the threadprivate copy of the variable VD
1924       CodeGenFunction CtorCGF(CGM);
1925 
1926       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1927       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1928       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1929           FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1930           llvm::GlobalValue::WeakODRLinkage);
1931       if (CGM.getTriple().isAMDGCN())
1932         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1933       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1934       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1935                             FunctionArgList(), Loc, Loc);
1936       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1937       llvm::Constant *AddrInAS0 = Addr;
1938       if (Addr->getAddressSpace() != 0)
1939         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1940             Addr, llvm::PointerType::getWithSamePointeeType(
1941                       cast<llvm::PointerType>(Addr->getType()), 0));
1942       CtorCGF.EmitAnyExprToMem(Init,
1943                                Address(AddrInAS0, Addr->getValueType(),
1944                                        CGM.getContext().getDeclAlign(VD)),
1945                                Init->getType().getQualifiers(),
1946                                /*IsInitializer=*/true);
1947       CtorCGF.FinishFunction();
1948       Ctor = Fn;
1949       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1950     } else {
1951       Ctor = new llvm::GlobalVariable(
1952           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1953           llvm::GlobalValue::PrivateLinkage,
1954           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1955       ID = Ctor;
1956     }
1957 
1958     // Register the information for the entry associated with the constructor.
1959     Out.clear();
1960     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1961         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1962         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1963   }
1964   if (VD->getType().isDestructedType() != QualType::DK_none) {
1965     llvm::Constant *Dtor;
1966     llvm::Constant *ID;
1967     if (CGM.getLangOpts().OpenMPIsDevice) {
1968       // Generate function that emits destructor call for the threadprivate
1969       // copy of the variable VD
1970       CodeGenFunction DtorCGF(CGM);
1971 
1972       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1973       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1974       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1975           FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1976           llvm::GlobalValue::WeakODRLinkage);
1977       if (CGM.getTriple().isAMDGCN())
1978         Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1979       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1980       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1981                             FunctionArgList(), Loc, Loc);
1982       // Create a scope with an artificial location for the body of this
1983       // function.
1984       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1985       llvm::Constant *AddrInAS0 = Addr;
1986       if (Addr->getAddressSpace() != 0)
1987         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1988             Addr, llvm::PointerType::getWithSamePointeeType(
1989                       cast<llvm::PointerType>(Addr->getType()), 0));
1990       DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1991                                   CGM.getContext().getDeclAlign(VD)),
1992                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1993                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1994       DtorCGF.FinishFunction();
1995       Dtor = Fn;
1996       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1997     } else {
1998       Dtor = new llvm::GlobalVariable(
1999           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2000           llvm::GlobalValue::PrivateLinkage,
2001           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2002       ID = Dtor;
2003     }
2004     // Register the information for the entry associated with the destructor.
2005     Out.clear();
2006     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2007         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2008         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2009   }
2010   return CGM.getLangOpts().OpenMPIsDevice;
2011 }
2012 
2013 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2014                                                           QualType VarType,
2015                                                           StringRef Name) {
2016   std::string Suffix = getName({"artificial", ""});
2017   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2018   llvm::GlobalVariable *GAddr =
2019       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2020   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2021       CGM.getTarget().isTLSSupported()) {
2022     GAddr->setThreadLocal(/*Val=*/true);
2023     return Address(GAddr, GAddr->getValueType(),
2024                    CGM.getContext().getTypeAlignInChars(VarType));
2025   }
2026   std::string CacheSuffix = getName({"cache", ""});
2027   llvm::Value *Args[] = {
2028       emitUpdateLocation(CGF, SourceLocation()),
2029       getThreadID(CGF, SourceLocation()),
2030       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2031       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2032                                 /*isSigned=*/false),
2033       getOrCreateInternalVariable(
2034           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2035   return Address(
2036       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2037           CGF.EmitRuntimeCall(
2038               OMPBuilder.getOrCreateRuntimeFunction(
2039                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2040               Args),
2041           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2042       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
2043 }
2044 
2045 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2046                                    const RegionCodeGenTy &ThenGen,
2047                                    const RegionCodeGenTy &ElseGen) {
2048   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2049 
2050   // If the condition constant folds and can be elided, try to avoid emitting
2051   // the condition and the dead arm of the if/else.
2052   bool CondConstant;
2053   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2054     if (CondConstant)
2055       ThenGen(CGF);
2056     else
2057       ElseGen(CGF);
2058     return;
2059   }
2060 
2061   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2062   // emit the conditional branch.
2063   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2064   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2065   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2066   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2067 
2068   // Emit the 'then' code.
2069   CGF.EmitBlock(ThenBlock);
2070   ThenGen(CGF);
2071   CGF.EmitBranch(ContBlock);
2072   // Emit the 'else' code if present.
2073   // There is no need to emit line number for unconditional branch.
2074   (void)ApplyDebugLocation::CreateEmpty(CGF);
2075   CGF.EmitBlock(ElseBlock);
2076   ElseGen(CGF);
2077   // There is no need to emit line number for unconditional branch.
2078   (void)ApplyDebugLocation::CreateEmpty(CGF);
2079   CGF.EmitBranch(ContBlock);
2080   // Emit the continuation block for code after the if.
2081   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2082 }
2083 
2084 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2085                                        llvm::Function *OutlinedFn,
2086                                        ArrayRef<llvm::Value *> CapturedVars,
2087                                        const Expr *IfCond,
2088                                        llvm::Value *NumThreads) {
2089   if (!CGF.HaveInsertPoint())
2090     return;
2091   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2092   auto &M = CGM.getModule();
2093   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2094                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2095     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2096     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2097     llvm::Value *Args[] = {
2098         RTLoc,
2099         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2100         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2101     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2102     RealArgs.append(std::begin(Args), std::end(Args));
2103     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2104 
2105     llvm::FunctionCallee RTLFn =
2106         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2107     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2108   };
2109   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2110                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2111     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2112     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2113     // Build calls:
2114     // __kmpc_serialized_parallel(&Loc, GTid);
2115     llvm::Value *Args[] = {RTLoc, ThreadID};
2116     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2117                             M, OMPRTL___kmpc_serialized_parallel),
2118                         Args);
2119 
2120     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2121     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2122     Address ZeroAddrBound =
2123         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2124                                          /*Name=*/".bound.zero.addr");
2125     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2126     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2127     // ThreadId for serialized parallels is 0.
2128     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2129     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2130     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2131 
2132     // Ensure we do not inline the function. This is trivially true for the ones
2133     // passed to __kmpc_fork_call but the ones called in serialized regions
2134     // could be inlined. This is not a perfect but it is closer to the invariant
2135     // we want, namely, every data environment starts with a new function.
2136     // TODO: We should pass the if condition to the runtime function and do the
2137     //       handling there. Much cleaner code.
2138     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2139     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2140     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2141 
2142     // __kmpc_end_serialized_parallel(&Loc, GTid);
2143     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2144     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2145                             M, OMPRTL___kmpc_end_serialized_parallel),
2146                         EndArgs);
2147   };
2148   if (IfCond) {
2149     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2150   } else {
2151     RegionCodeGenTy ThenRCG(ThenGen);
2152     ThenRCG(CGF);
2153   }
2154 }
2155 
2156 // If we're inside an (outlined) parallel region, use the region info's
2157 // thread-ID variable (it is passed in a first argument of the outlined function
2158 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2159 // regular serial code region, get thread ID by calling kmp_int32
2160 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2161 // return the address of that temp.
2162 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2163                                              SourceLocation Loc) {
2164   if (auto *OMPRegionInfo =
2165           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2166     if (OMPRegionInfo->getThreadIDVariable())
2167       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2168 
2169   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2170   QualType Int32Ty =
2171       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2172   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2173   CGF.EmitStoreOfScalar(ThreadID,
2174                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2175 
2176   return ThreadIDTemp;
2177 }
2178 
2179 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2180     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2181   SmallString<256> Buffer;
2182   llvm::raw_svector_ostream Out(Buffer);
2183   Out << Name;
2184   StringRef RuntimeName = Out.str();
2185   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2186   if (Elem.second) {
2187     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2188            "OMP internal variable has different type than requested");
2189     return &*Elem.second;
2190   }
2191 
2192   return Elem.second = new llvm::GlobalVariable(
2193              CGM.getModule(), Ty, /*IsConstant*/ false,
2194              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2195              Elem.first(), /*InsertBefore=*/nullptr,
2196              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2197 }
2198 
2199 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2200   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2201   std::string Name = getName({Prefix, "var"});
2202   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2203 }
2204 
2205 namespace {
2206 /// Common pre(post)-action for different OpenMP constructs.
2207 class CommonActionTy final : public PrePostActionTy {
2208   llvm::FunctionCallee EnterCallee;
2209   ArrayRef<llvm::Value *> EnterArgs;
2210   llvm::FunctionCallee ExitCallee;
2211   ArrayRef<llvm::Value *> ExitArgs;
2212   bool Conditional;
2213   llvm::BasicBlock *ContBlock = nullptr;
2214 
2215 public:
2216   CommonActionTy(llvm::FunctionCallee EnterCallee,
2217                  ArrayRef<llvm::Value *> EnterArgs,
2218                  llvm::FunctionCallee ExitCallee,
2219                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2220       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2221         ExitArgs(ExitArgs), Conditional(Conditional) {}
2222   void Enter(CodeGenFunction &CGF) override {
2223     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2224     if (Conditional) {
2225       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2226       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2227       ContBlock = CGF.createBasicBlock("omp_if.end");
2228       // Generate the branch (If-stmt)
2229       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2230       CGF.EmitBlock(ThenBlock);
2231     }
2232   }
2233   void Done(CodeGenFunction &CGF) {
2234     // Emit the rest of blocks/branches
2235     CGF.EmitBranch(ContBlock);
2236     CGF.EmitBlock(ContBlock, true);
2237   }
2238   void Exit(CodeGenFunction &CGF) override {
2239     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2240   }
2241 };
2242 } // anonymous namespace
2243 
2244 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2245                                          StringRef CriticalName,
2246                                          const RegionCodeGenTy &CriticalOpGen,
2247                                          SourceLocation Loc, const Expr *Hint) {
2248   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2249   // CriticalOpGen();
2250   // __kmpc_end_critical(ident_t *, gtid, Lock);
2251   // Prepare arguments and build a call to __kmpc_critical
2252   if (!CGF.HaveInsertPoint())
2253     return;
2254   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2255                          getCriticalRegionLock(CriticalName)};
2256   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2257                                                 std::end(Args));
2258   if (Hint) {
2259     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2260         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2261   }
2262   CommonActionTy Action(
2263       OMPBuilder.getOrCreateRuntimeFunction(
2264           CGM.getModule(),
2265           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2266       EnterArgs,
2267       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2268                                             OMPRTL___kmpc_end_critical),
2269       Args);
2270   CriticalOpGen.setAction(Action);
2271   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2272 }
2273 
2274 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2275                                        const RegionCodeGenTy &MasterOpGen,
2276                                        SourceLocation Loc) {
2277   if (!CGF.HaveInsertPoint())
2278     return;
2279   // if(__kmpc_master(ident_t *, gtid)) {
2280   //   MasterOpGen();
2281   //   __kmpc_end_master(ident_t *, gtid);
2282   // }
2283   // Prepare arguments and build a call to __kmpc_master
2284   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2285   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2286                             CGM.getModule(), OMPRTL___kmpc_master),
2287                         Args,
2288                         OMPBuilder.getOrCreateRuntimeFunction(
2289                             CGM.getModule(), OMPRTL___kmpc_end_master),
2290                         Args,
2291                         /*Conditional=*/true);
2292   MasterOpGen.setAction(Action);
2293   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2294   Action.Done(CGF);
2295 }
2296 
2297 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2298                                        const RegionCodeGenTy &MaskedOpGen,
2299                                        SourceLocation Loc, const Expr *Filter) {
2300   if (!CGF.HaveInsertPoint())
2301     return;
2302   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2303   //   MaskedOpGen();
2304   //   __kmpc_end_masked(iden_t *, gtid);
2305   // }
2306   // Prepare arguments and build a call to __kmpc_masked
2307   llvm::Value *FilterVal = Filter
2308                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2309                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2310   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2311                          FilterVal};
2312   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2313                             getThreadID(CGF, Loc)};
2314   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2315                             CGM.getModule(), OMPRTL___kmpc_masked),
2316                         Args,
2317                         OMPBuilder.getOrCreateRuntimeFunction(
2318                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2319                         ArgsEnd,
2320                         /*Conditional=*/true);
2321   MaskedOpGen.setAction(Action);
2322   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2323   Action.Done(CGF);
2324 }
2325 
2326 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2327                                         SourceLocation Loc) {
2328   if (!CGF.HaveInsertPoint())
2329     return;
2330   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2331     OMPBuilder.createTaskyield(CGF.Builder);
2332   } else {
2333     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2334     llvm::Value *Args[] = {
2335         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2336         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2337     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2338                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2339                         Args);
2340   }
2341 
2342   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2343     Region->emitUntiedSwitch(CGF);
2344 }
2345 
2346 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2347                                           const RegionCodeGenTy &TaskgroupOpGen,
2348                                           SourceLocation Loc) {
2349   if (!CGF.HaveInsertPoint())
2350     return;
2351   // __kmpc_taskgroup(ident_t *, gtid);
2352   // TaskgroupOpGen();
2353   // __kmpc_end_taskgroup(ident_t *, gtid);
2354   // Prepare arguments and build a call to __kmpc_taskgroup
2355   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2356   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2357                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2358                         Args,
2359                         OMPBuilder.getOrCreateRuntimeFunction(
2360                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2361                         Args);
2362   TaskgroupOpGen.setAction(Action);
2363   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2364 }
2365 
2366 /// Given an array of pointers to variables, project the address of a
2367 /// given variable.
2368 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2369                                       unsigned Index, const VarDecl *Var) {
2370   // Pull out the pointer to the variable.
2371   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2372   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2373 
2374   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2375   return Address(
2376       CGF.Builder.CreateBitCast(
2377           Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2378       ElemTy, CGF.getContext().getDeclAlign(Var));
2379 }
2380 
2381 static llvm::Value *emitCopyprivateCopyFunction(
2382     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2383     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2384     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2385     SourceLocation Loc) {
2386   ASTContext &C = CGM.getContext();
2387   // void copy_func(void *LHSArg, void *RHSArg);
2388   FunctionArgList Args;
2389   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2390                            ImplicitParamDecl::Other);
2391   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2392                            ImplicitParamDecl::Other);
2393   Args.push_back(&LHSArg);
2394   Args.push_back(&RHSArg);
2395   const auto &CGFI =
2396       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2397   std::string Name =
2398       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2399   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2400                                     llvm::GlobalValue::InternalLinkage, Name,
2401                                     &CGM.getModule());
2402   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2403   Fn->setDoesNotRecurse();
2404   CodeGenFunction CGF(CGM);
2405   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2406   // Dest = (void*[n])(LHSArg);
2407   // Src = (void*[n])(RHSArg);
2408   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2409                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2410                   ArgsElemType->getPointerTo()),
2411               ArgsElemType, CGF.getPointerAlign());
2412   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2413                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2414                   ArgsElemType->getPointerTo()),
2415               ArgsElemType, CGF.getPointerAlign());
2416   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2417   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2418   // ...
2419   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2420   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2421     const auto *DestVar =
2422         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2423     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2424 
2425     const auto *SrcVar =
2426         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2427     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2428 
2429     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2430     QualType Type = VD->getType();
2431     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2432   }
2433   CGF.FinishFunction();
2434   return Fn;
2435 }
2436 
2437 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2438                                        const RegionCodeGenTy &SingleOpGen,
2439                                        SourceLocation Loc,
2440                                        ArrayRef<const Expr *> CopyprivateVars,
2441                                        ArrayRef<const Expr *> SrcExprs,
2442                                        ArrayRef<const Expr *> DstExprs,
2443                                        ArrayRef<const Expr *> AssignmentOps) {
2444   if (!CGF.HaveInsertPoint())
2445     return;
2446   assert(CopyprivateVars.size() == SrcExprs.size() &&
2447          CopyprivateVars.size() == DstExprs.size() &&
2448          CopyprivateVars.size() == AssignmentOps.size());
2449   ASTContext &C = CGM.getContext();
2450   // int32 did_it = 0;
2451   // if(__kmpc_single(ident_t *, gtid)) {
2452   //   SingleOpGen();
2453   //   __kmpc_end_single(ident_t *, gtid);
2454   //   did_it = 1;
2455   // }
2456   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2457   // <copy_func>, did_it);
2458 
2459   Address DidIt = Address::invalid();
2460   if (!CopyprivateVars.empty()) {
2461     // int32 did_it = 0;
2462     QualType KmpInt32Ty =
2463         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2464     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2465     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2466   }
2467   // Prepare arguments and build a call to __kmpc_single
2468   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2469   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2470                             CGM.getModule(), OMPRTL___kmpc_single),
2471                         Args,
2472                         OMPBuilder.getOrCreateRuntimeFunction(
2473                             CGM.getModule(), OMPRTL___kmpc_end_single),
2474                         Args,
2475                         /*Conditional=*/true);
2476   SingleOpGen.setAction(Action);
2477   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2478   if (DidIt.isValid()) {
2479     // did_it = 1;
2480     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2481   }
2482   Action.Done(CGF);
2483   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2484   // <copy_func>, did_it);
2485   if (DidIt.isValid()) {
2486     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2487     QualType CopyprivateArrayTy = C.getConstantArrayType(
2488         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2489         /*IndexTypeQuals=*/0);
2490     // Create a list of all private variables for copyprivate.
2491     Address CopyprivateList =
2492         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2493     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2494       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2495       CGF.Builder.CreateStore(
2496           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2497               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2498               CGF.VoidPtrTy),
2499           Elem);
2500     }
2501     // Build function that copies private values from single region to all other
2502     // threads in the corresponding parallel region.
2503     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2504         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2505         SrcExprs, DstExprs, AssignmentOps, Loc);
2506     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2507     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2508         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2509     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2510     llvm::Value *Args[] = {
2511         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2512         getThreadID(CGF, Loc),        // i32 <gtid>
2513         BufSize,                      // size_t <buf_size>
2514         CL.getPointer(),              // void *<copyprivate list>
2515         CpyFn,                        // void (*) (void *, void *) <copy_func>
2516         DidItVal                      // i32 did_it
2517     };
2518     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2519                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2520                         Args);
2521   }
2522 }
2523 
2524 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2525                                         const RegionCodeGenTy &OrderedOpGen,
2526                                         SourceLocation Loc, bool IsThreads) {
2527   if (!CGF.HaveInsertPoint())
2528     return;
2529   // __kmpc_ordered(ident_t *, gtid);
2530   // OrderedOpGen();
2531   // __kmpc_end_ordered(ident_t *, gtid);
2532   // Prepare arguments and build a call to __kmpc_ordered
2533   if (IsThreads) {
2534     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2535     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2536                               CGM.getModule(), OMPRTL___kmpc_ordered),
2537                           Args,
2538                           OMPBuilder.getOrCreateRuntimeFunction(
2539                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2540                           Args);
2541     OrderedOpGen.setAction(Action);
2542     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2543     return;
2544   }
2545   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2546 }
2547 
2548 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2549   unsigned Flags;
2550   if (Kind == OMPD_for)
2551     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2552   else if (Kind == OMPD_sections)
2553     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2554   else if (Kind == OMPD_single)
2555     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2556   else if (Kind == OMPD_barrier)
2557     Flags = OMP_IDENT_BARRIER_EXPL;
2558   else
2559     Flags = OMP_IDENT_BARRIER_IMPL;
2560   return Flags;
2561 }
2562 
2563 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2564     CodeGenFunction &CGF, const OMPLoopDirective &S,
2565     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2566   // Check if the loop directive is actually a doacross loop directive. In this
2567   // case choose static, 1 schedule.
2568   if (llvm::any_of(
2569           S.getClausesOfKind<OMPOrderedClause>(),
2570           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2571     ScheduleKind = OMPC_SCHEDULE_static;
2572     // Chunk size is 1 in this case.
2573     llvm::APInt ChunkSize(32, 1);
2574     ChunkExpr = IntegerLiteral::Create(
2575         CGF.getContext(), ChunkSize,
2576         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2577         SourceLocation());
2578   }
2579 }
2580 
2581 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2582                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2583                                       bool ForceSimpleCall) {
2584   // Check if we should use the OMPBuilder
2585   auto *OMPRegionInfo =
2586       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2587   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2588     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2589         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2590     return;
2591   }
2592 
2593   if (!CGF.HaveInsertPoint())
2594     return;
2595   // Build call __kmpc_cancel_barrier(loc, thread_id);
2596   // Build call __kmpc_barrier(loc, thread_id);
2597   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2598   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2599   // thread_id);
2600   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2601                          getThreadID(CGF, Loc)};
2602   if (OMPRegionInfo) {
2603     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2604       llvm::Value *Result = CGF.EmitRuntimeCall(
2605           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2606                                                 OMPRTL___kmpc_cancel_barrier),
2607           Args);
2608       if (EmitChecks) {
2609         // if (__kmpc_cancel_barrier()) {
2610         //   exit from construct;
2611         // }
2612         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2613         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2614         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2615         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2616         CGF.EmitBlock(ExitBB);
2617         //   exit from construct;
2618         CodeGenFunction::JumpDest CancelDestination =
2619             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2620         CGF.EmitBranchThroughCleanup(CancelDestination);
2621         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2622       }
2623       return;
2624     }
2625   }
2626   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2627                           CGM.getModule(), OMPRTL___kmpc_barrier),
2628                       Args);
2629 }
2630 
2631 /// Map the OpenMP loop schedule to the runtime enumeration.
2632 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2633                                           bool Chunked, bool Ordered) {
2634   switch (ScheduleKind) {
2635   case OMPC_SCHEDULE_static:
2636     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2637                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2638   case OMPC_SCHEDULE_dynamic:
2639     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2640   case OMPC_SCHEDULE_guided:
2641     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2642   case OMPC_SCHEDULE_runtime:
2643     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2644   case OMPC_SCHEDULE_auto:
2645     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2646   case OMPC_SCHEDULE_unknown:
2647     assert(!Chunked && "chunk was specified but schedule kind not known");
2648     return Ordered ? OMP_ord_static : OMP_sch_static;
2649   }
2650   llvm_unreachable("Unexpected runtime schedule");
2651 }
2652 
2653 /// Map the OpenMP distribute schedule to the runtime enumeration.
2654 static OpenMPSchedType
2655 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2656   // only static is allowed for dist_schedule
2657   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2658 }
2659 
2660 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2661                                          bool Chunked) const {
2662   OpenMPSchedType Schedule =
2663       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2664   return Schedule == OMP_sch_static;
2665 }
2666 
2667 bool CGOpenMPRuntime::isStaticNonchunked(
2668     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2669   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2670   return Schedule == OMP_dist_sch_static;
2671 }
2672 
2673 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2674                                       bool Chunked) const {
2675   OpenMPSchedType Schedule =
2676       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2677   return Schedule == OMP_sch_static_chunked;
2678 }
2679 
2680 bool CGOpenMPRuntime::isStaticChunked(
2681     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2682   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2683   return Schedule == OMP_dist_sch_static_chunked;
2684 }
2685 
2686 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2687   OpenMPSchedType Schedule =
2688       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2689   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2690   return Schedule != OMP_sch_static;
2691 }
2692 
2693 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2694                                   OpenMPScheduleClauseModifier M1,
2695                                   OpenMPScheduleClauseModifier M2) {
2696   int Modifier = 0;
2697   switch (M1) {
2698   case OMPC_SCHEDULE_MODIFIER_monotonic:
2699     Modifier = OMP_sch_modifier_monotonic;
2700     break;
2701   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2702     Modifier = OMP_sch_modifier_nonmonotonic;
2703     break;
2704   case OMPC_SCHEDULE_MODIFIER_simd:
2705     if (Schedule == OMP_sch_static_chunked)
2706       Schedule = OMP_sch_static_balanced_chunked;
2707     break;
2708   case OMPC_SCHEDULE_MODIFIER_last:
2709   case OMPC_SCHEDULE_MODIFIER_unknown:
2710     break;
2711   }
2712   switch (M2) {
2713   case OMPC_SCHEDULE_MODIFIER_monotonic:
2714     Modifier = OMP_sch_modifier_monotonic;
2715     break;
2716   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2717     Modifier = OMP_sch_modifier_nonmonotonic;
2718     break;
2719   case OMPC_SCHEDULE_MODIFIER_simd:
2720     if (Schedule == OMP_sch_static_chunked)
2721       Schedule = OMP_sch_static_balanced_chunked;
2722     break;
2723   case OMPC_SCHEDULE_MODIFIER_last:
2724   case OMPC_SCHEDULE_MODIFIER_unknown:
2725     break;
2726   }
2727   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2728   // If the static schedule kind is specified or if the ordered clause is
2729   // specified, and if the nonmonotonic modifier is not specified, the effect is
2730   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2731   // modifier is specified, the effect is as if the nonmonotonic modifier is
2732   // specified.
2733   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2734     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2735           Schedule == OMP_sch_static_balanced_chunked ||
2736           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2737           Schedule == OMP_dist_sch_static_chunked ||
2738           Schedule == OMP_dist_sch_static))
2739       Modifier = OMP_sch_modifier_nonmonotonic;
2740   }
2741   return Schedule | Modifier;
2742 }
2743 
2744 void CGOpenMPRuntime::emitForDispatchInit(
2745     CodeGenFunction &CGF, SourceLocation Loc,
2746     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2747     bool Ordered, const DispatchRTInput &DispatchValues) {
2748   if (!CGF.HaveInsertPoint())
2749     return;
2750   OpenMPSchedType Schedule = getRuntimeSchedule(
2751       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2752   assert(Ordered ||
2753          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2754           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2755           Schedule != OMP_sch_static_balanced_chunked));
2756   // Call __kmpc_dispatch_init(
2757   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2758   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2759   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2760 
2761   // If the Chunk was not specified in the clause - use default value 1.
2762   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2763                                             : CGF.Builder.getIntN(IVSize, 1);
2764   llvm::Value *Args[] = {
2765       emitUpdateLocation(CGF, Loc),
2766       getThreadID(CGF, Loc),
2767       CGF.Builder.getInt32(addMonoNonMonoModifier(
2768           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2769       DispatchValues.LB,                                     // Lower
2770       DispatchValues.UB,                                     // Upper
2771       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2772       Chunk                                                  // Chunk
2773   };
2774   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2775 }
2776 
2777 static void emitForStaticInitCall(
2778     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2779     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2780     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2781     const CGOpenMPRuntime::StaticRTInput &Values) {
2782   if (!CGF.HaveInsertPoint())
2783     return;
2784 
2785   assert(!Values.Ordered);
2786   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2787          Schedule == OMP_sch_static_balanced_chunked ||
2788          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2789          Schedule == OMP_dist_sch_static ||
2790          Schedule == OMP_dist_sch_static_chunked);
2791 
2792   // Call __kmpc_for_static_init(
2793   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2794   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2795   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2796   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2797   llvm::Value *Chunk = Values.Chunk;
2798   if (Chunk == nullptr) {
2799     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2800             Schedule == OMP_dist_sch_static) &&
2801            "expected static non-chunked schedule");
2802     // If the Chunk was not specified in the clause - use default value 1.
2803     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2804   } else {
2805     assert((Schedule == OMP_sch_static_chunked ||
2806             Schedule == OMP_sch_static_balanced_chunked ||
2807             Schedule == OMP_ord_static_chunked ||
2808             Schedule == OMP_dist_sch_static_chunked) &&
2809            "expected static chunked schedule");
2810   }
2811   llvm::Value *Args[] = {
2812       UpdateLocation,
2813       ThreadId,
2814       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2815                                                   M2)), // Schedule type
2816       Values.IL.getPointer(),                           // &isLastIter
2817       Values.LB.getPointer(),                           // &LB
2818       Values.UB.getPointer(),                           // &UB
2819       Values.ST.getPointer(),                           // &Stride
2820       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2821       Chunk                                             // Chunk
2822   };
2823   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2824 }
2825 
2826 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2827                                         SourceLocation Loc,
2828                                         OpenMPDirectiveKind DKind,
2829                                         const OpenMPScheduleTy &ScheduleKind,
2830                                         const StaticRTInput &Values) {
2831   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2832       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2833   assert(isOpenMPWorksharingDirective(DKind) &&
2834          "Expected loop-based or sections-based directive.");
2835   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2836                                              isOpenMPLoopDirective(DKind)
2837                                                  ? OMP_IDENT_WORK_LOOP
2838                                                  : OMP_IDENT_WORK_SECTIONS);
2839   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2840   llvm::FunctionCallee StaticInitFunction =
2841       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2842   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2843   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2844                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2845 }
2846 
2847 void CGOpenMPRuntime::emitDistributeStaticInit(
2848     CodeGenFunction &CGF, SourceLocation Loc,
2849     OpenMPDistScheduleClauseKind SchedKind,
2850     const CGOpenMPRuntime::StaticRTInput &Values) {
2851   OpenMPSchedType ScheduleNum =
2852       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2853   llvm::Value *UpdatedLocation =
2854       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2855   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2856   llvm::FunctionCallee StaticInitFunction;
2857   bool isGPUDistribute =
2858       CGM.getLangOpts().OpenMPIsDevice &&
2859       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2860   StaticInitFunction = createForStaticInitFunction(
2861       Values.IVSize, Values.IVSigned, isGPUDistribute);
2862 
2863   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2864                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2865                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2866 }
2867 
2868 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2869                                           SourceLocation Loc,
2870                                           OpenMPDirectiveKind DKind) {
2871   if (!CGF.HaveInsertPoint())
2872     return;
2873   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2874   llvm::Value *Args[] = {
2875       emitUpdateLocation(CGF, Loc,
2876                          isOpenMPDistributeDirective(DKind)
2877                              ? OMP_IDENT_WORK_DISTRIBUTE
2878                              : isOpenMPLoopDirective(DKind)
2879                                    ? OMP_IDENT_WORK_LOOP
2880                                    : OMP_IDENT_WORK_SECTIONS),
2881       getThreadID(CGF, Loc)};
2882   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2883   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2884       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2885     CGF.EmitRuntimeCall(
2886         OMPBuilder.getOrCreateRuntimeFunction(
2887             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2888         Args);
2889   else
2890     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2891                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2892                         Args);
2893 }
2894 
2895 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2896                                                  SourceLocation Loc,
2897                                                  unsigned IVSize,
2898                                                  bool IVSigned) {
2899   if (!CGF.HaveInsertPoint())
2900     return;
2901   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2902   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2903   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2904 }
2905 
2906 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2907                                           SourceLocation Loc, unsigned IVSize,
2908                                           bool IVSigned, Address IL,
2909                                           Address LB, Address UB,
2910                                           Address ST) {
2911   // Call __kmpc_dispatch_next(
2912   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2913   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2914   //          kmp_int[32|64] *p_stride);
2915   llvm::Value *Args[] = {
2916       emitUpdateLocation(CGF, Loc),
2917       getThreadID(CGF, Loc),
2918       IL.getPointer(), // &isLastIter
2919       LB.getPointer(), // &Lower
2920       UB.getPointer(), // &Upper
2921       ST.getPointer()  // &Stride
2922   };
2923   llvm::Value *Call =
2924       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2925   return CGF.EmitScalarConversion(
2926       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2927       CGF.getContext().BoolTy, Loc);
2928 }
2929 
2930 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2931                                            llvm::Value *NumThreads,
2932                                            SourceLocation Loc) {
2933   if (!CGF.HaveInsertPoint())
2934     return;
2935   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2936   llvm::Value *Args[] = {
2937       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2938       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2939   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2940                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2941                       Args);
2942 }
2943 
2944 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2945                                          ProcBindKind ProcBind,
2946                                          SourceLocation Loc) {
2947   if (!CGF.HaveInsertPoint())
2948     return;
2949   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2950   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2951   llvm::Value *Args[] = {
2952       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2953       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2954   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2955                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2956                       Args);
2957 }
2958 
2959 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2960                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2961   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2962     OMPBuilder.createFlush(CGF.Builder);
2963   } else {
2964     if (!CGF.HaveInsertPoint())
2965       return;
2966     // Build call void __kmpc_flush(ident_t *loc)
2967     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2968                             CGM.getModule(), OMPRTL___kmpc_flush),
2969                         emitUpdateLocation(CGF, Loc));
2970   }
2971 }
2972 
2973 namespace {
2974 /// Indexes of fields for type kmp_task_t.
2975 enum KmpTaskTFields {
2976   /// List of shared variables.
2977   KmpTaskTShareds,
2978   /// Task routine.
2979   KmpTaskTRoutine,
2980   /// Partition id for the untied tasks.
2981   KmpTaskTPartId,
2982   /// Function with call of destructors for private variables.
2983   Data1,
2984   /// Task priority.
2985   Data2,
2986   /// (Taskloops only) Lower bound.
2987   KmpTaskTLowerBound,
2988   /// (Taskloops only) Upper bound.
2989   KmpTaskTUpperBound,
2990   /// (Taskloops only) Stride.
2991   KmpTaskTStride,
2992   /// (Taskloops only) Is last iteration flag.
2993   KmpTaskTLastIter,
2994   /// (Taskloops only) Reduction data.
2995   KmpTaskTReductions,
2996 };
2997 } // anonymous namespace
2998 
2999 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3000   return OffloadEntriesTargetRegion.empty() &&
3001          OffloadEntriesDeviceGlobalVar.empty();
3002 }
3003 
3004 /// Initialize target region entry.
3005 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3006     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3007                                     StringRef ParentName, unsigned LineNum,
3008                                     unsigned Order) {
3009   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3010                                              "only required for the device "
3011                                              "code generation.");
3012   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3013       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3014                                    OMPTargetRegionEntryTargetRegion);
3015   ++OffloadingEntriesNum;
3016 }
3017 
3018 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3019     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3020                                   StringRef ParentName, unsigned LineNum,
3021                                   llvm::Constant *Addr, llvm::Constant *ID,
3022                                   OMPTargetRegionEntryKind Flags) {
3023   // If we are emitting code for a target, the entry is already initialized,
3024   // only has to be registered.
3025   if (CGM.getLangOpts().OpenMPIsDevice) {
3026     // This could happen if the device compilation is invoked standalone.
3027     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3028       return;
3029     auto &Entry =
3030         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3031     Entry.setAddress(Addr);
3032     Entry.setID(ID);
3033     Entry.setFlags(Flags);
3034   } else {
3035     if (Flags ==
3036             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3037         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3038                                  /*IgnoreAddressId*/ true))
3039       return;
3040     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3041            "Target region entry already registered!");
3042     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3043     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3044     ++OffloadingEntriesNum;
3045   }
3046 }
3047 
3048 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3049     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3050     bool IgnoreAddressId) const {
3051   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3052   if (PerDevice == OffloadEntriesTargetRegion.end())
3053     return false;
3054   auto PerFile = PerDevice->second.find(FileID);
3055   if (PerFile == PerDevice->second.end())
3056     return false;
3057   auto PerParentName = PerFile->second.find(ParentName);
3058   if (PerParentName == PerFile->second.end())
3059     return false;
3060   auto PerLine = PerParentName->second.find(LineNum);
3061   if (PerLine == PerParentName->second.end())
3062     return false;
3063   // Fail if this entry is already registered.
3064   if (!IgnoreAddressId &&
3065       (PerLine->second.getAddress() || PerLine->second.getID()))
3066     return false;
3067   return true;
3068 }
3069 
3070 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3071     const OffloadTargetRegionEntryInfoActTy &Action) {
3072   // Scan all target region entries and perform the provided action.
3073   for (const auto &D : OffloadEntriesTargetRegion)
3074     for (const auto &F : D.second)
3075       for (const auto &P : F.second)
3076         for (const auto &L : P.second)
3077           Action(D.first, F.first, P.first(), L.first, L.second);
3078 }
3079 
3080 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3081     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3082                                        OMPTargetGlobalVarEntryKind Flags,
3083                                        unsigned Order) {
3084   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3085                                              "only required for the device "
3086                                              "code generation.");
3087   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3088   ++OffloadingEntriesNum;
3089 }
3090 
3091 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3092     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3093                                      CharUnits VarSize,
3094                                      OMPTargetGlobalVarEntryKind Flags,
3095                                      llvm::GlobalValue::LinkageTypes Linkage) {
3096   if (CGM.getLangOpts().OpenMPIsDevice) {
3097     // This could happen if the device compilation is invoked standalone.
3098     if (!hasDeviceGlobalVarEntryInfo(VarName))
3099       return;
3100     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3101     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3102       if (Entry.getVarSize().isZero()) {
3103         Entry.setVarSize(VarSize);
3104         Entry.setLinkage(Linkage);
3105       }
3106       return;
3107     }
3108     Entry.setVarSize(VarSize);
3109     Entry.setLinkage(Linkage);
3110     Entry.setAddress(Addr);
3111   } else {
3112     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3113       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3114       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3115              "Entry not initialized!");
3116       if (Entry.getVarSize().isZero()) {
3117         Entry.setVarSize(VarSize);
3118         Entry.setLinkage(Linkage);
3119       }
3120       return;
3121     }
3122     OffloadEntriesDeviceGlobalVar.try_emplace(
3123         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3124     ++OffloadingEntriesNum;
3125   }
3126 }
3127 
3128 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3129     actOnDeviceGlobalVarEntriesInfo(
3130         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3131   // Scan all target region entries and perform the provided action.
3132   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3133     Action(E.getKey(), E.getValue());
3134 }
3135 
3136 void CGOpenMPRuntime::createOffloadEntry(
3137     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3138     llvm::GlobalValue::LinkageTypes Linkage) {
3139   StringRef Name = Addr->getName();
3140   llvm::Module &M = CGM.getModule();
3141   llvm::LLVMContext &C = M.getContext();
3142 
3143   // Create constant string with the name.
3144   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3145 
3146   std::string StringName = getName({"omp_offloading", "entry_name"});
3147   auto *Str = new llvm::GlobalVariable(
3148       M, StrPtrInit->getType(), /*isConstant=*/true,
3149       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3150   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3151 
3152   llvm::Constant *Data[] = {
3153       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3154       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3155       llvm::ConstantInt::get(CGM.SizeTy, Size),
3156       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3157       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3158   std::string EntryName = getName({"omp_offloading", "entry", ""});
3159   llvm::GlobalVariable *Entry = createGlobalStruct(
3160       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3161       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3162 
3163   // The entry has to be created in the section the linker expects it to be.
3164   Entry->setSection("omp_offloading_entries");
3165 }
3166 
3167 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3168   // Emit the offloading entries and metadata so that the device codegen side
3169   // can easily figure out what to emit. The produced metadata looks like
3170   // this:
3171   //
3172   // !omp_offload.info = !{!1, ...}
3173   //
3174   // Right now we only generate metadata for function that contain target
3175   // regions.
3176 
3177   // If we are in simd mode or there are no entries, we don't need to do
3178   // anything.
3179   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3180     return;
3181 
3182   llvm::Module &M = CGM.getModule();
3183   llvm::LLVMContext &C = M.getContext();
3184   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3185                          SourceLocation, StringRef>,
3186               16>
3187       OrderedEntries(OffloadEntriesInfoManager.size());
3188   llvm::SmallVector<StringRef, 16> ParentFunctions(
3189       OffloadEntriesInfoManager.size());
3190 
3191   // Auxiliary methods to create metadata values and strings.
3192   auto &&GetMDInt = [this](unsigned V) {
3193     return llvm::ConstantAsMetadata::get(
3194         llvm::ConstantInt::get(CGM.Int32Ty, V));
3195   };
3196 
3197   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3198 
3199   // Create the offloading info metadata node.
3200   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3201 
3202   // Create function that emits metadata for each target region entry;
3203   auto &&TargetRegionMetadataEmitter =
3204       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3205        &GetMDString](
3206           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3207           unsigned Line,
3208           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3209         // Generate metadata for target regions. Each entry of this metadata
3210         // contains:
3211         // - Entry 0 -> Kind of this type of metadata (0).
3212         // - Entry 1 -> Device ID of the file where the entry was identified.
3213         // - Entry 2 -> File ID of the file where the entry was identified.
3214         // - Entry 3 -> Mangled name of the function where the entry was
3215         // identified.
3216         // - Entry 4 -> Line in the file where the entry was identified.
3217         // - Entry 5 -> Order the entry was created.
3218         // The first element of the metadata node is the kind.
3219         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3220                                  GetMDInt(FileID),      GetMDString(ParentName),
3221                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3222 
3223         SourceLocation Loc;
3224         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3225                   E = CGM.getContext().getSourceManager().fileinfo_end();
3226              I != E; ++I) {
3227           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3228               I->getFirst()->getUniqueID().getFile() == FileID) {
3229             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3230                 I->getFirst(), Line, 1);
3231             break;
3232           }
3233         }
3234         // Save this entry in the right position of the ordered entries array.
3235         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3236         ParentFunctions[E.getOrder()] = ParentName;
3237 
3238         // Add metadata to the named metadata node.
3239         MD->addOperand(llvm::MDNode::get(C, Ops));
3240       };
3241 
3242   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3243       TargetRegionMetadataEmitter);
3244 
3245   // Create function that emits metadata for each device global variable entry;
3246   auto &&DeviceGlobalVarMetadataEmitter =
3247       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3248        MD](StringRef MangledName,
3249            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3250                &E) {
3251         // Generate metadata for global variables. Each entry of this metadata
3252         // contains:
3253         // - Entry 0 -> Kind of this type of metadata (1).
3254         // - Entry 1 -> Mangled name of the variable.
3255         // - Entry 2 -> Declare target kind.
3256         // - Entry 3 -> Order the entry was created.
3257         // The first element of the metadata node is the kind.
3258         llvm::Metadata *Ops[] = {
3259             GetMDInt(E.getKind()), GetMDString(MangledName),
3260             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3261 
3262         // Save this entry in the right position of the ordered entries array.
3263         OrderedEntries[E.getOrder()] =
3264             std::make_tuple(&E, SourceLocation(), MangledName);
3265 
3266         // Add metadata to the named metadata node.
3267         MD->addOperand(llvm::MDNode::get(C, Ops));
3268       };
3269 
3270   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3271       DeviceGlobalVarMetadataEmitter);
3272 
3273   for (const auto &E : OrderedEntries) {
3274     assert(std::get<0>(E) && "All ordered entries must exist!");
3275     if (const auto *CE =
3276             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3277                 std::get<0>(E))) {
3278       if (!CE->getID() || !CE->getAddress()) {
3279         // Do not blame the entry if the parent funtion is not emitted.
3280         StringRef FnName = ParentFunctions[CE->getOrder()];
3281         if (!CGM.GetGlobalValue(FnName))
3282           continue;
3283         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3284             DiagnosticsEngine::Error,
3285             "Offloading entry for target region in %0 is incorrect: either the "
3286             "address or the ID is invalid.");
3287         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3288         continue;
3289       }
3290       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3291                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3292     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3293                                              OffloadEntryInfoDeviceGlobalVar>(
3294                    std::get<0>(E))) {
3295       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3296           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3297               CE->getFlags());
3298       switch (Flags) {
3299       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3300         if (CGM.getLangOpts().OpenMPIsDevice &&
3301             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3302           continue;
3303         if (!CE->getAddress()) {
3304           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3305               DiagnosticsEngine::Error, "Offloading entry for declare target "
3306                                         "variable %0 is incorrect: the "
3307                                         "address is invalid.");
3308           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3309           continue;
3310         }
3311         // The vaiable has no definition - no need to add the entry.
3312         if (CE->getVarSize().isZero())
3313           continue;
3314         break;
3315       }
3316       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3317         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3318                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3319                "Declaret target link address is set.");
3320         if (CGM.getLangOpts().OpenMPIsDevice)
3321           continue;
3322         if (!CE->getAddress()) {
3323           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3324               DiagnosticsEngine::Error,
3325               "Offloading entry for declare target variable is incorrect: the "
3326               "address is invalid.");
3327           CGM.getDiags().Report(DiagID);
3328           continue;
3329         }
3330         break;
3331       }
3332 
3333       // Hidden or internal symbols on the device are not externally visible. We
3334       // should not attempt to register them by creating an offloading entry.
3335       if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress()))
3336         if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
3337           continue;
3338 
3339       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3340                          CE->getVarSize().getQuantity(), Flags,
3341                          CE->getLinkage());
3342     } else {
3343       llvm_unreachable("Unsupported entry kind.");
3344     }
3345   }
3346 }
3347 
3348 /// Loads all the offload entries information from the host IR
3349 /// metadata.
3350 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3351   // If we are in target mode, load the metadata from the host IR. This code has
3352   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3353 
3354   if (!CGM.getLangOpts().OpenMPIsDevice)
3355     return;
3356 
3357   if (CGM.getLangOpts().OMPHostIRFile.empty())
3358     return;
3359 
3360   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3361   if (auto EC = Buf.getError()) {
3362     CGM.getDiags().Report(diag::err_cannot_open_file)
3363         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3364     return;
3365   }
3366 
3367   llvm::LLVMContext C;
3368   auto ME = expectedToErrorOrAndEmitErrors(
3369       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3370 
3371   if (auto EC = ME.getError()) {
3372     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3373         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3374     CGM.getDiags().Report(DiagID)
3375         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3376     return;
3377   }
3378 
3379   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3380   if (!MD)
3381     return;
3382 
3383   for (llvm::MDNode *MN : MD->operands()) {
3384     auto &&GetMDInt = [MN](unsigned Idx) {
3385       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3386       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3387     };
3388 
3389     auto &&GetMDString = [MN](unsigned Idx) {
3390       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3391       return V->getString();
3392     };
3393 
3394     switch (GetMDInt(0)) {
3395     default:
3396       llvm_unreachable("Unexpected metadata!");
3397       break;
3398     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3399         OffloadingEntryInfoTargetRegion:
3400       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3401           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3402           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3403           /*Order=*/GetMDInt(5));
3404       break;
3405     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3406         OffloadingEntryInfoDeviceGlobalVar:
3407       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3408           /*MangledName=*/GetMDString(1),
3409           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3410               /*Flags=*/GetMDInt(2)),
3411           /*Order=*/GetMDInt(3));
3412       break;
3413     }
3414   }
3415 }
3416 
3417 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3418   if (!KmpRoutineEntryPtrTy) {
3419     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3420     ASTContext &C = CGM.getContext();
3421     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3422     FunctionProtoType::ExtProtoInfo EPI;
3423     KmpRoutineEntryPtrQTy = C.getPointerType(
3424         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3425     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3426   }
3427 }
3428 
3429 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3430   // Make sure the type of the entry is already created. This is the type we
3431   // have to create:
3432   // struct __tgt_offload_entry{
3433   //   void      *addr;       // Pointer to the offload entry info.
3434   //                          // (function or global)
3435   //   char      *name;       // Name of the function or global.
3436   //   size_t     size;       // Size of the entry info (0 if it a function).
3437   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3438   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3439   // };
3440   if (TgtOffloadEntryQTy.isNull()) {
3441     ASTContext &C = CGM.getContext();
3442     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3443     RD->startDefinition();
3444     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3445     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3446     addFieldToRecordDecl(C, RD, C.getSizeType());
3447     addFieldToRecordDecl(
3448         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3449     addFieldToRecordDecl(
3450         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3451     RD->completeDefinition();
3452     RD->addAttr(PackedAttr::CreateImplicit(C));
3453     TgtOffloadEntryQTy = C.getRecordType(RD);
3454   }
3455   return TgtOffloadEntryQTy;
3456 }
3457 
3458 namespace {
3459 struct PrivateHelpersTy {
3460   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3461                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3462       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3463         PrivateElemInit(PrivateElemInit) {}
3464   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3465   const Expr *OriginalRef = nullptr;
3466   const VarDecl *Original = nullptr;
3467   const VarDecl *PrivateCopy = nullptr;
3468   const VarDecl *PrivateElemInit = nullptr;
3469   bool isLocalPrivate() const {
3470     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3471   }
3472 };
3473 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3474 } // anonymous namespace
3475 
3476 static bool isAllocatableDecl(const VarDecl *VD) {
3477   const VarDecl *CVD = VD->getCanonicalDecl();
3478   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3479     return false;
3480   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3481   // Use the default allocation.
3482   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3483            !AA->getAllocator());
3484 }
3485 
3486 static RecordDecl *
3487 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3488   if (!Privates.empty()) {
3489     ASTContext &C = CGM.getContext();
3490     // Build struct .kmp_privates_t. {
3491     //         /*  private vars  */
3492     //       };
3493     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3494     RD->startDefinition();
3495     for (const auto &Pair : Privates) {
3496       const VarDecl *VD = Pair.second.Original;
3497       QualType Type = VD->getType().getNonReferenceType();
3498       // If the private variable is a local variable with lvalue ref type,
3499       // allocate the pointer instead of the pointee type.
3500       if (Pair.second.isLocalPrivate()) {
3501         if (VD->getType()->isLValueReferenceType())
3502           Type = C.getPointerType(Type);
3503         if (isAllocatableDecl(VD))
3504           Type = C.getPointerType(Type);
3505       }
3506       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3507       if (VD->hasAttrs()) {
3508         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3509              E(VD->getAttrs().end());
3510              I != E; ++I)
3511           FD->addAttr(*I);
3512       }
3513     }
3514     RD->completeDefinition();
3515     return RD;
3516   }
3517   return nullptr;
3518 }
3519 
3520 static RecordDecl *
3521 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3522                          QualType KmpInt32Ty,
3523                          QualType KmpRoutineEntryPointerQTy) {
3524   ASTContext &C = CGM.getContext();
3525   // Build struct kmp_task_t {
3526   //         void *              shareds;
3527   //         kmp_routine_entry_t routine;
3528   //         kmp_int32           part_id;
3529   //         kmp_cmplrdata_t data1;
3530   //         kmp_cmplrdata_t data2;
3531   // For taskloops additional fields:
3532   //         kmp_uint64          lb;
3533   //         kmp_uint64          ub;
3534   //         kmp_int64           st;
3535   //         kmp_int32           liter;
3536   //         void *              reductions;
3537   //       };
3538   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3539   UD->startDefinition();
3540   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3541   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3542   UD->completeDefinition();
3543   QualType KmpCmplrdataTy = C.getRecordType(UD);
3544   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3545   RD->startDefinition();
3546   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3547   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3548   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3549   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3550   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3551   if (isOpenMPTaskLoopDirective(Kind)) {
3552     QualType KmpUInt64Ty =
3553         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3554     QualType KmpInt64Ty =
3555         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3556     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3557     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3558     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3559     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3560     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3561   }
3562   RD->completeDefinition();
3563   return RD;
3564 }
3565 
3566 static RecordDecl *
3567 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3568                                      ArrayRef<PrivateDataTy> Privates) {
3569   ASTContext &C = CGM.getContext();
3570   // Build struct kmp_task_t_with_privates {
3571   //         kmp_task_t task_data;
3572   //         .kmp_privates_t. privates;
3573   //       };
3574   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3575   RD->startDefinition();
3576   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3577   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3578     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3579   RD->completeDefinition();
3580   return RD;
3581 }
3582 
3583 /// Emit a proxy function which accepts kmp_task_t as the second
3584 /// argument.
3585 /// \code
3586 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3587 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3588 ///   For taskloops:
3589 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3590 ///   tt->reductions, tt->shareds);
3591 ///   return 0;
3592 /// }
3593 /// \endcode
3594 static llvm::Function *
3595 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3596                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3597                       QualType KmpTaskTWithPrivatesPtrQTy,
3598                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3599                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3600                       llvm::Value *TaskPrivatesMap) {
3601   ASTContext &C = CGM.getContext();
3602   FunctionArgList Args;
3603   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3604                             ImplicitParamDecl::Other);
3605   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3606                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3607                                 ImplicitParamDecl::Other);
3608   Args.push_back(&GtidArg);
3609   Args.push_back(&TaskTypeArg);
3610   const auto &TaskEntryFnInfo =
3611       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3612   llvm::FunctionType *TaskEntryTy =
3613       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3614   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3615   auto *TaskEntry = llvm::Function::Create(
3616       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3617   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3618   TaskEntry->setDoesNotRecurse();
3619   CodeGenFunction CGF(CGM);
3620   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3621                     Loc, Loc);
3622 
3623   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3624   // tt,
3625   // For taskloops:
3626   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3627   // tt->task_data.shareds);
3628   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3629       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3630   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3631       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3632       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3633   const auto *KmpTaskTWithPrivatesQTyRD =
3634       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3635   LValue Base =
3636       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3637   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3638   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3639   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3640   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3641 
3642   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3643   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3644   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3645       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3646       CGF.ConvertTypeForMem(SharedsPtrTy));
3647 
3648   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3649   llvm::Value *PrivatesParam;
3650   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3651     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3652     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3653         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3654   } else {
3655     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3656   }
3657 
3658   llvm::Value *CommonArgs[] = {
3659       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3660       CGF.Builder
3661           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3662                                                CGF.VoidPtrTy, CGF.Int8Ty)
3663           .getPointer()};
3664   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3665                                           std::end(CommonArgs));
3666   if (isOpenMPTaskLoopDirective(Kind)) {
3667     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3668     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3669     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3670     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3671     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3672     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3673     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3674     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3675     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3676     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3677     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3678     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3679     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3680     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3681     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3682     CallArgs.push_back(LBParam);
3683     CallArgs.push_back(UBParam);
3684     CallArgs.push_back(StParam);
3685     CallArgs.push_back(LIParam);
3686     CallArgs.push_back(RParam);
3687   }
3688   CallArgs.push_back(SharedsParam);
3689 
3690   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3691                                                   CallArgs);
3692   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3693                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3694   CGF.FinishFunction();
3695   return TaskEntry;
3696 }
3697 
3698 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3699                                             SourceLocation Loc,
3700                                             QualType KmpInt32Ty,
3701                                             QualType KmpTaskTWithPrivatesPtrQTy,
3702                                             QualType KmpTaskTWithPrivatesQTy) {
3703   ASTContext &C = CGM.getContext();
3704   FunctionArgList Args;
3705   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3706                             ImplicitParamDecl::Other);
3707   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3708                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3709                                 ImplicitParamDecl::Other);
3710   Args.push_back(&GtidArg);
3711   Args.push_back(&TaskTypeArg);
3712   const auto &DestructorFnInfo =
3713       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3714   llvm::FunctionType *DestructorFnTy =
3715       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3716   std::string Name =
3717       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3718   auto *DestructorFn =
3719       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3720                              Name, &CGM.getModule());
3721   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3722                                     DestructorFnInfo);
3723   DestructorFn->setDoesNotRecurse();
3724   CodeGenFunction CGF(CGM);
3725   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3726                     Args, Loc, Loc);
3727 
3728   LValue Base = CGF.EmitLoadOfPointerLValue(
3729       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3730       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3731   const auto *KmpTaskTWithPrivatesQTyRD =
3732       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3733   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3734   Base = CGF.EmitLValueForField(Base, *FI);
3735   for (const auto *Field :
3736        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3737     if (QualType::DestructionKind DtorKind =
3738             Field->getType().isDestructedType()) {
3739       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3740       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3741     }
3742   }
3743   CGF.FinishFunction();
3744   return DestructorFn;
3745 }
3746 
3747 /// Emit a privates mapping function for correct handling of private and
3748 /// firstprivate variables.
3749 /// \code
3750 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3751 /// **noalias priv1,...,  <tyn> **noalias privn) {
3752 ///   *priv1 = &.privates.priv1;
3753 ///   ...;
3754 ///   *privn = &.privates.privn;
3755 /// }
3756 /// \endcode
3757 static llvm::Value *
3758 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3759                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3760                                ArrayRef<PrivateDataTy> Privates) {
3761   ASTContext &C = CGM.getContext();
3762   FunctionArgList Args;
3763   ImplicitParamDecl TaskPrivatesArg(
3764       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3765       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3766       ImplicitParamDecl::Other);
3767   Args.push_back(&TaskPrivatesArg);
3768   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3769   unsigned Counter = 1;
3770   for (const Expr *E : Data.PrivateVars) {
3771     Args.push_back(ImplicitParamDecl::Create(
3772         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3773         C.getPointerType(C.getPointerType(E->getType()))
3774             .withConst()
3775             .withRestrict(),
3776         ImplicitParamDecl::Other));
3777     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3778     PrivateVarsPos[VD] = Counter;
3779     ++Counter;
3780   }
3781   for (const Expr *E : Data.FirstprivateVars) {
3782     Args.push_back(ImplicitParamDecl::Create(
3783         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3784         C.getPointerType(C.getPointerType(E->getType()))
3785             .withConst()
3786             .withRestrict(),
3787         ImplicitParamDecl::Other));
3788     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3789     PrivateVarsPos[VD] = Counter;
3790     ++Counter;
3791   }
3792   for (const Expr *E : Data.LastprivateVars) {
3793     Args.push_back(ImplicitParamDecl::Create(
3794         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3795         C.getPointerType(C.getPointerType(E->getType()))
3796             .withConst()
3797             .withRestrict(),
3798         ImplicitParamDecl::Other));
3799     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3800     PrivateVarsPos[VD] = Counter;
3801     ++Counter;
3802   }
3803   for (const VarDecl *VD : Data.PrivateLocals) {
3804     QualType Ty = VD->getType().getNonReferenceType();
3805     if (VD->getType()->isLValueReferenceType())
3806       Ty = C.getPointerType(Ty);
3807     if (isAllocatableDecl(VD))
3808       Ty = C.getPointerType(Ty);
3809     Args.push_back(ImplicitParamDecl::Create(
3810         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3811         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3812         ImplicitParamDecl::Other));
3813     PrivateVarsPos[VD] = Counter;
3814     ++Counter;
3815   }
3816   const auto &TaskPrivatesMapFnInfo =
3817       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3818   llvm::FunctionType *TaskPrivatesMapTy =
3819       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3820   std::string Name =
3821       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3822   auto *TaskPrivatesMap = llvm::Function::Create(
3823       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3824       &CGM.getModule());
3825   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3826                                     TaskPrivatesMapFnInfo);
3827   if (CGM.getLangOpts().Optimize) {
3828     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3829     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3830     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3831   }
3832   CodeGenFunction CGF(CGM);
3833   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3834                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3835 
3836   // *privi = &.privates.privi;
3837   LValue Base = CGF.EmitLoadOfPointerLValue(
3838       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3839       TaskPrivatesArg.getType()->castAs<PointerType>());
3840   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3841   Counter = 0;
3842   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3843     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3844     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3845     LValue RefLVal =
3846         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3847     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3848         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3849     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3850     ++Counter;
3851   }
3852   CGF.FinishFunction();
3853   return TaskPrivatesMap;
3854 }
3855 
3856 /// Emit initialization for private variables in task-based directives.
3857 static void emitPrivatesInit(CodeGenFunction &CGF,
3858                              const OMPExecutableDirective &D,
3859                              Address KmpTaskSharedsPtr, LValue TDBase,
3860                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3861                              QualType SharedsTy, QualType SharedsPtrTy,
3862                              const OMPTaskDataTy &Data,
3863                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3864   ASTContext &C = CGF.getContext();
3865   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3866   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3867   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3868                                  ? OMPD_taskloop
3869                                  : OMPD_task;
3870   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3871   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3872   LValue SrcBase;
3873   bool IsTargetTask =
3874       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3875       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3876   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3877   // PointersArray, SizesArray, and MappersArray. The original variables for
3878   // these arrays are not captured and we get their addresses explicitly.
3879   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3880       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3881     SrcBase = CGF.MakeAddrLValue(
3882         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3883             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3884             CGF.ConvertTypeForMem(SharedsTy)),
3885         SharedsTy);
3886   }
3887   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3888   for (const PrivateDataTy &Pair : Privates) {
3889     // Do not initialize private locals.
3890     if (Pair.second.isLocalPrivate()) {
3891       ++FI;
3892       continue;
3893     }
3894     const VarDecl *VD = Pair.second.PrivateCopy;
3895     const Expr *Init = VD->getAnyInitializer();
3896     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3897                              !CGF.isTrivialInitializer(Init)))) {
3898       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3899       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3900         const VarDecl *OriginalVD = Pair.second.Original;
3901         // Check if the variable is the target-based BasePointersArray,
3902         // PointersArray, SizesArray, or MappersArray.
3903         LValue SharedRefLValue;
3904         QualType Type = PrivateLValue.getType();
3905         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3906         if (IsTargetTask && !SharedField) {
3907           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3908                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3909                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3910                          ->getNumParams() == 0 &&
3911                  isa<TranslationUnitDecl>(
3912                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3913                          ->getDeclContext()) &&
3914                  "Expected artificial target data variable.");
3915           SharedRefLValue =
3916               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3917         } else if (ForDup) {
3918           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3919           SharedRefLValue = CGF.MakeAddrLValue(
3920               SharedRefLValue.getAddress(CGF).withAlignment(
3921                   C.getDeclAlign(OriginalVD)),
3922               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3923               SharedRefLValue.getTBAAInfo());
3924         } else if (CGF.LambdaCaptureFields.count(
3925                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3926                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3927           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3928         } else {
3929           // Processing for implicitly captured variables.
3930           InlinedOpenMPRegionRAII Region(
3931               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3932               /*HasCancel=*/false, /*NoInheritance=*/true);
3933           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3934         }
3935         if (Type->isArrayType()) {
3936           // Initialize firstprivate array.
3937           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3938             // Perform simple memcpy.
3939             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3940           } else {
3941             // Initialize firstprivate array using element-by-element
3942             // initialization.
3943             CGF.EmitOMPAggregateAssign(
3944                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3945                 Type,
3946                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3947                                                   Address SrcElement) {
3948                   // Clean up any temporaries needed by the initialization.
3949                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3950                   InitScope.addPrivate(Elem, SrcElement);
3951                   (void)InitScope.Privatize();
3952                   // Emit initialization for single element.
3953                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3954                       CGF, &CapturesInfo);
3955                   CGF.EmitAnyExprToMem(Init, DestElement,
3956                                        Init->getType().getQualifiers(),
3957                                        /*IsInitializer=*/false);
3958                 });
3959           }
3960         } else {
3961           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3962           InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3963           (void)InitScope.Privatize();
3964           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3965           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3966                              /*capturedByInit=*/false);
3967         }
3968       } else {
3969         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3970       }
3971     }
3972     ++FI;
3973   }
3974 }
3975 
3976 /// Check if duplication function is required for taskloops.
3977 static bool checkInitIsRequired(CodeGenFunction &CGF,
3978                                 ArrayRef<PrivateDataTy> Privates) {
3979   bool InitRequired = false;
3980   for (const PrivateDataTy &Pair : Privates) {
3981     if (Pair.second.isLocalPrivate())
3982       continue;
3983     const VarDecl *VD = Pair.second.PrivateCopy;
3984     const Expr *Init = VD->getAnyInitializer();
3985     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3986                                     !CGF.isTrivialInitializer(Init));
3987     if (InitRequired)
3988       break;
3989   }
3990   return InitRequired;
3991 }
3992 
3993 
3994 /// Emit task_dup function (for initialization of
3995 /// private/firstprivate/lastprivate vars and last_iter flag)
3996 /// \code
3997 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3998 /// lastpriv) {
3999 /// // setup lastprivate flag
4000 ///    task_dst->last = lastpriv;
4001 /// // could be constructor calls here...
4002 /// }
4003 /// \endcode
4004 static llvm::Value *
4005 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4006                     const OMPExecutableDirective &D,
4007                     QualType KmpTaskTWithPrivatesPtrQTy,
4008                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4009                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4010                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4011                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4012   ASTContext &C = CGM.getContext();
4013   FunctionArgList Args;
4014   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4015                            KmpTaskTWithPrivatesPtrQTy,
4016                            ImplicitParamDecl::Other);
4017   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4018                            KmpTaskTWithPrivatesPtrQTy,
4019                            ImplicitParamDecl::Other);
4020   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4021                                 ImplicitParamDecl::Other);
4022   Args.push_back(&DstArg);
4023   Args.push_back(&SrcArg);
4024   Args.push_back(&LastprivArg);
4025   const auto &TaskDupFnInfo =
4026       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4027   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4028   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4029   auto *TaskDup = llvm::Function::Create(
4030       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4031   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4032   TaskDup->setDoesNotRecurse();
4033   CodeGenFunction CGF(CGM);
4034   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4035                     Loc);
4036 
4037   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4038       CGF.GetAddrOfLocalVar(&DstArg),
4039       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4040   // task_dst->liter = lastpriv;
4041   if (WithLastIter) {
4042     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4043     LValue Base = CGF.EmitLValueForField(
4044         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4045     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4046     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4047         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4048     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4049   }
4050 
4051   // Emit initial values for private copies (if any).
4052   assert(!Privates.empty());
4053   Address KmpTaskSharedsPtr = Address::invalid();
4054   if (!Data.FirstprivateVars.empty()) {
4055     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4056         CGF.GetAddrOfLocalVar(&SrcArg),
4057         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4058     LValue Base = CGF.EmitLValueForField(
4059         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4060     KmpTaskSharedsPtr = Address(
4061         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4062                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4063                                                   KmpTaskTShareds)),
4064                              Loc),
4065         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4066   }
4067   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4068                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4069   CGF.FinishFunction();
4070   return TaskDup;
4071 }
4072 
4073 /// Checks if destructor function is required to be generated.
4074 /// \return true if cleanups are required, false otherwise.
4075 static bool
4076 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4077                          ArrayRef<PrivateDataTy> Privates) {
4078   for (const PrivateDataTy &P : Privates) {
4079     if (P.second.isLocalPrivate())
4080       continue;
4081     QualType Ty = P.second.Original->getType().getNonReferenceType();
4082     if (Ty.isDestructedType())
4083       return true;
4084   }
4085   return false;
4086 }
4087 
4088 namespace {
4089 /// Loop generator for OpenMP iterator expression.
4090 class OMPIteratorGeneratorScope final
4091     : public CodeGenFunction::OMPPrivateScope {
4092   CodeGenFunction &CGF;
4093   const OMPIteratorExpr *E = nullptr;
4094   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4095   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4096   OMPIteratorGeneratorScope() = delete;
4097   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4098 
4099 public:
4100   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4101       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4102     if (!E)
4103       return;
4104     SmallVector<llvm::Value *, 4> Uppers;
4105     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4106       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4107       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4108       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
4109       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4110       addPrivate(
4111           HelperData.CounterVD,
4112           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
4113     }
4114     Privatize();
4115 
4116     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4117       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4118       LValue CLVal =
4119           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4120                              HelperData.CounterVD->getType());
4121       // Counter = 0;
4122       CGF.EmitStoreOfScalar(
4123           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4124           CLVal);
4125       CodeGenFunction::JumpDest &ContDest =
4126           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4127       CodeGenFunction::JumpDest &ExitDest =
4128           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4129       // N = <number-of_iterations>;
4130       llvm::Value *N = Uppers[I];
4131       // cont:
4132       // if (Counter < N) goto body; else goto exit;
4133       CGF.EmitBlock(ContDest.getBlock());
4134       auto *CVal =
4135           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4136       llvm::Value *Cmp =
4137           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4138               ? CGF.Builder.CreateICmpSLT(CVal, N)
4139               : CGF.Builder.CreateICmpULT(CVal, N);
4140       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4141       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4142       // body:
4143       CGF.EmitBlock(BodyBB);
4144       // Iteri = Begini + Counter * Stepi;
4145       CGF.EmitIgnoredExpr(HelperData.Update);
4146     }
4147   }
4148   ~OMPIteratorGeneratorScope() {
4149     if (!E)
4150       return;
4151     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4152       // Counter = Counter + 1;
4153       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4154       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4155       // goto cont;
4156       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4157       // exit:
4158       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4159     }
4160   }
4161 };
4162 } // namespace
4163 
4164 static std::pair<llvm::Value *, llvm::Value *>
4165 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4166   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4167   llvm::Value *Addr;
4168   if (OASE) {
4169     const Expr *Base = OASE->getBase();
4170     Addr = CGF.EmitScalarExpr(Base);
4171   } else {
4172     Addr = CGF.EmitLValue(E).getPointer(CGF);
4173   }
4174   llvm::Value *SizeVal;
4175   QualType Ty = E->getType();
4176   if (OASE) {
4177     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4178     for (const Expr *SE : OASE->getDimensions()) {
4179       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4180       Sz = CGF.EmitScalarConversion(
4181           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4182       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4183     }
4184   } else if (const auto *ASE =
4185                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4186     LValue UpAddrLVal =
4187         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4188     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4189     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4190         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4191     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4192     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4193     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4194   } else {
4195     SizeVal = CGF.getTypeSize(Ty);
4196   }
4197   return std::make_pair(Addr, SizeVal);
4198 }
4199 
4200 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4201 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4202   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4203   if (KmpTaskAffinityInfoTy.isNull()) {
4204     RecordDecl *KmpAffinityInfoRD =
4205         C.buildImplicitRecord("kmp_task_affinity_info_t");
4206     KmpAffinityInfoRD->startDefinition();
4207     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4208     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4209     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4210     KmpAffinityInfoRD->completeDefinition();
4211     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4212   }
4213 }
4214 
4215 CGOpenMPRuntime::TaskResultTy
4216 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4217                               const OMPExecutableDirective &D,
4218                               llvm::Function *TaskFunction, QualType SharedsTy,
4219                               Address Shareds, const OMPTaskDataTy &Data) {
4220   ASTContext &C = CGM.getContext();
4221   llvm::SmallVector<PrivateDataTy, 4> Privates;
4222   // Aggregate privates and sort them by the alignment.
4223   const auto *I = Data.PrivateCopies.begin();
4224   for (const Expr *E : Data.PrivateVars) {
4225     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4226     Privates.emplace_back(
4227         C.getDeclAlign(VD),
4228         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4229                          /*PrivateElemInit=*/nullptr));
4230     ++I;
4231   }
4232   I = Data.FirstprivateCopies.begin();
4233   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4234   for (const Expr *E : Data.FirstprivateVars) {
4235     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4236     Privates.emplace_back(
4237         C.getDeclAlign(VD),
4238         PrivateHelpersTy(
4239             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4240             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4241     ++I;
4242     ++IElemInitRef;
4243   }
4244   I = Data.LastprivateCopies.begin();
4245   for (const Expr *E : Data.LastprivateVars) {
4246     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4247     Privates.emplace_back(
4248         C.getDeclAlign(VD),
4249         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4250                          /*PrivateElemInit=*/nullptr));
4251     ++I;
4252   }
4253   for (const VarDecl *VD : Data.PrivateLocals) {
4254     if (isAllocatableDecl(VD))
4255       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4256     else
4257       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4258   }
4259   llvm::stable_sort(Privates,
4260                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4261                       return L.first > R.first;
4262                     });
4263   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4264   // Build type kmp_routine_entry_t (if not built yet).
4265   emitKmpRoutineEntryT(KmpInt32Ty);
4266   // Build type kmp_task_t (if not built yet).
4267   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4268     if (SavedKmpTaskloopTQTy.isNull()) {
4269       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4270           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4271     }
4272     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4273   } else {
4274     assert((D.getDirectiveKind() == OMPD_task ||
4275             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4276             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4277            "Expected taskloop, task or target directive");
4278     if (SavedKmpTaskTQTy.isNull()) {
4279       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4280           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4281     }
4282     KmpTaskTQTy = SavedKmpTaskTQTy;
4283   }
4284   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4285   // Build particular struct kmp_task_t for the given task.
4286   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4287       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4288   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4289   QualType KmpTaskTWithPrivatesPtrQTy =
4290       C.getPointerType(KmpTaskTWithPrivatesQTy);
4291   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4292   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4293       KmpTaskTWithPrivatesTy->getPointerTo();
4294   llvm::Value *KmpTaskTWithPrivatesTySize =
4295       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4296   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4297 
4298   // Emit initial values for private copies (if any).
4299   llvm::Value *TaskPrivatesMap = nullptr;
4300   llvm::Type *TaskPrivatesMapTy =
4301       std::next(TaskFunction->arg_begin(), 3)->getType();
4302   if (!Privates.empty()) {
4303     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4304     TaskPrivatesMap =
4305         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4306     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4307         TaskPrivatesMap, TaskPrivatesMapTy);
4308   } else {
4309     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4310         cast<llvm::PointerType>(TaskPrivatesMapTy));
4311   }
4312   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4313   // kmp_task_t *tt);
4314   llvm::Function *TaskEntry = emitProxyTaskFunction(
4315       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4316       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4317       TaskPrivatesMap);
4318 
4319   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4320   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4321   // kmp_routine_entry_t *task_entry);
4322   // Task flags. Format is taken from
4323   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4324   // description of kmp_tasking_flags struct.
4325   enum {
4326     TiedFlag = 0x1,
4327     FinalFlag = 0x2,
4328     DestructorsFlag = 0x8,
4329     PriorityFlag = 0x20,
4330     DetachableFlag = 0x40,
4331   };
4332   unsigned Flags = Data.Tied ? TiedFlag : 0;
4333   bool NeedsCleanup = false;
4334   if (!Privates.empty()) {
4335     NeedsCleanup =
4336         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4337     if (NeedsCleanup)
4338       Flags = Flags | DestructorsFlag;
4339   }
4340   if (Data.Priority.getInt())
4341     Flags = Flags | PriorityFlag;
4342   if (D.hasClausesOfKind<OMPDetachClause>())
4343     Flags = Flags | DetachableFlag;
4344   llvm::Value *TaskFlags =
4345       Data.Final.getPointer()
4346           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4347                                      CGF.Builder.getInt32(FinalFlag),
4348                                      CGF.Builder.getInt32(/*C=*/0))
4349           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4350   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4351   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4352   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4353       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4354       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4355           TaskEntry, KmpRoutineEntryPtrTy)};
4356   llvm::Value *NewTask;
4357   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4358     // Check if we have any device clause associated with the directive.
4359     const Expr *Device = nullptr;
4360     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4361       Device = C->getDevice();
4362     // Emit device ID if any otherwise use default value.
4363     llvm::Value *DeviceID;
4364     if (Device)
4365       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4366                                            CGF.Int64Ty, /*isSigned=*/true);
4367     else
4368       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4369     AllocArgs.push_back(DeviceID);
4370     NewTask = CGF.EmitRuntimeCall(
4371         OMPBuilder.getOrCreateRuntimeFunction(
4372             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4373         AllocArgs);
4374   } else {
4375     NewTask =
4376         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4377                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4378                             AllocArgs);
4379   }
4380   // Emit detach clause initialization.
4381   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4382   // task_descriptor);
4383   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4384     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4385     LValue EvtLVal = CGF.EmitLValue(Evt);
4386 
4387     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4388     // int gtid, kmp_task_t *task);
4389     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4390     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4391     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4392     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4393         OMPBuilder.getOrCreateRuntimeFunction(
4394             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4395         {Loc, Tid, NewTask});
4396     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4397                                       Evt->getExprLoc());
4398     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4399   }
4400   // Process affinity clauses.
4401   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4402     // Process list of affinity data.
4403     ASTContext &C = CGM.getContext();
4404     Address AffinitiesArray = Address::invalid();
4405     // Calculate number of elements to form the array of affinity data.
4406     llvm::Value *NumOfElements = nullptr;
4407     unsigned NumAffinities = 0;
4408     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4409       if (const Expr *Modifier = C->getModifier()) {
4410         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4411         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4412           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4413           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4414           NumOfElements =
4415               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4416         }
4417       } else {
4418         NumAffinities += C->varlist_size();
4419       }
4420     }
4421     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4422     // Fields ids in kmp_task_affinity_info record.
4423     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4424 
4425     QualType KmpTaskAffinityInfoArrayTy;
4426     if (NumOfElements) {
4427       NumOfElements = CGF.Builder.CreateNUWAdd(
4428           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4429       auto *OVE = new (C) OpaqueValueExpr(
4430           Loc,
4431           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4432           VK_PRValue);
4433       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4434                                                     RValue::get(NumOfElements));
4435       KmpTaskAffinityInfoArrayTy =
4436           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4437                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4438       // Properly emit variable-sized array.
4439       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4440                                            ImplicitParamDecl::Other);
4441       CGF.EmitVarDecl(*PD);
4442       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4443       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4444                                                 /*isSigned=*/false);
4445     } else {
4446       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4447           KmpTaskAffinityInfoTy,
4448           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4449           ArrayType::Normal, /*IndexTypeQuals=*/0);
4450       AffinitiesArray =
4451           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4452       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4453       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4454                                              /*isSigned=*/false);
4455     }
4456 
4457     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4458     // Fill array by elements without iterators.
4459     unsigned Pos = 0;
4460     bool HasIterator = false;
4461     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4462       if (C->getModifier()) {
4463         HasIterator = true;
4464         continue;
4465       }
4466       for (const Expr *E : C->varlists()) {
4467         llvm::Value *Addr;
4468         llvm::Value *Size;
4469         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4470         LValue Base =
4471             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4472                                KmpTaskAffinityInfoTy);
4473         // affs[i].base_addr = &<Affinities[i].second>;
4474         LValue BaseAddrLVal = CGF.EmitLValueForField(
4475             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4476         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4477                               BaseAddrLVal);
4478         // affs[i].len = sizeof(<Affinities[i].second>);
4479         LValue LenLVal = CGF.EmitLValueForField(
4480             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4481         CGF.EmitStoreOfScalar(Size, LenLVal);
4482         ++Pos;
4483       }
4484     }
4485     LValue PosLVal;
4486     if (HasIterator) {
4487       PosLVal = CGF.MakeAddrLValue(
4488           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4489           C.getSizeType());
4490       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4491     }
4492     // Process elements with iterators.
4493     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4494       const Expr *Modifier = C->getModifier();
4495       if (!Modifier)
4496         continue;
4497       OMPIteratorGeneratorScope IteratorScope(
4498           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4499       for (const Expr *E : C->varlists()) {
4500         llvm::Value *Addr;
4501         llvm::Value *Size;
4502         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4503         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4504         LValue Base = CGF.MakeAddrLValue(
4505             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4506         // affs[i].base_addr = &<Affinities[i].second>;
4507         LValue BaseAddrLVal = CGF.EmitLValueForField(
4508             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4509         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4510                               BaseAddrLVal);
4511         // affs[i].len = sizeof(<Affinities[i].second>);
4512         LValue LenLVal = CGF.EmitLValueForField(
4513             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4514         CGF.EmitStoreOfScalar(Size, LenLVal);
4515         Idx = CGF.Builder.CreateNUWAdd(
4516             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4517         CGF.EmitStoreOfScalar(Idx, PosLVal);
4518       }
4519     }
4520     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4521     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4522     // naffins, kmp_task_affinity_info_t *affin_list);
4523     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4524     llvm::Value *GTid = getThreadID(CGF, Loc);
4525     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4526         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4527     // FIXME: Emit the function and ignore its result for now unless the
4528     // runtime function is properly implemented.
4529     (void)CGF.EmitRuntimeCall(
4530         OMPBuilder.getOrCreateRuntimeFunction(
4531             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4532         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4533   }
4534   llvm::Value *NewTaskNewTaskTTy =
4535       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4536           NewTask, KmpTaskTWithPrivatesPtrTy);
4537   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4538                                                KmpTaskTWithPrivatesQTy);
4539   LValue TDBase =
4540       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4541   // Fill the data in the resulting kmp_task_t record.
4542   // Copy shareds if there are any.
4543   Address KmpTaskSharedsPtr = Address::invalid();
4544   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4545     KmpTaskSharedsPtr = Address(
4546         CGF.EmitLoadOfScalar(
4547             CGF.EmitLValueForField(
4548                 TDBase,
4549                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4550             Loc),
4551         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4552     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4553     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4554     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4555   }
4556   // Emit initial values for private copies (if any).
4557   TaskResultTy Result;
4558   if (!Privates.empty()) {
4559     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4560                      SharedsTy, SharedsPtrTy, Data, Privates,
4561                      /*ForDup=*/false);
4562     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4563         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4564       Result.TaskDupFn = emitTaskDupFunction(
4565           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4566           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4567           /*WithLastIter=*/!Data.LastprivateVars.empty());
4568     }
4569   }
4570   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4571   enum { Priority = 0, Destructors = 1 };
4572   // Provide pointer to function with destructors for privates.
4573   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4574   const RecordDecl *KmpCmplrdataUD =
4575       (*FI)->getType()->getAsUnionType()->getDecl();
4576   if (NeedsCleanup) {
4577     llvm::Value *DestructorFn = emitDestructorsFunction(
4578         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4579         KmpTaskTWithPrivatesQTy);
4580     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4581     LValue DestructorsLV = CGF.EmitLValueForField(
4582         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4583     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4584                               DestructorFn, KmpRoutineEntryPtrTy),
4585                           DestructorsLV);
4586   }
4587   // Set priority.
4588   if (Data.Priority.getInt()) {
4589     LValue Data2LV = CGF.EmitLValueForField(
4590         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4591     LValue PriorityLV = CGF.EmitLValueForField(
4592         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4593     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4594   }
4595   Result.NewTask = NewTask;
4596   Result.TaskEntry = TaskEntry;
4597   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4598   Result.TDBase = TDBase;
4599   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4600   return Result;
4601 }
4602 
4603 namespace {
4604 /// Dependence kind for RTL.
4605 enum RTLDependenceKindTy {
4606   DepIn = 0x01,
4607   DepInOut = 0x3,
4608   DepMutexInOutSet = 0x4,
4609   DepInOutSet = 0x8
4610 };
4611 /// Fields ids in kmp_depend_info record.
4612 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4613 } // namespace
4614 
4615 /// Translates internal dependency kind into the runtime kind.
4616 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4617   RTLDependenceKindTy DepKind;
4618   switch (K) {
4619   case OMPC_DEPEND_in:
4620     DepKind = DepIn;
4621     break;
4622   // Out and InOut dependencies must use the same code.
4623   case OMPC_DEPEND_out:
4624   case OMPC_DEPEND_inout:
4625     DepKind = DepInOut;
4626     break;
4627   case OMPC_DEPEND_mutexinoutset:
4628     DepKind = DepMutexInOutSet;
4629     break;
4630   case OMPC_DEPEND_inoutset:
4631     DepKind = DepInOutSet;
4632     break;
4633   case OMPC_DEPEND_source:
4634   case OMPC_DEPEND_sink:
4635   case OMPC_DEPEND_depobj:
4636   case OMPC_DEPEND_unknown:
4637     llvm_unreachable("Unknown task dependence type");
4638   }
4639   return DepKind;
4640 }
4641 
4642 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4643 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4644                            QualType &FlagsTy) {
4645   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4646   if (KmpDependInfoTy.isNull()) {
4647     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4648     KmpDependInfoRD->startDefinition();
4649     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4650     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4651     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4652     KmpDependInfoRD->completeDefinition();
4653     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4654   }
4655 }
4656 
4657 std::pair<llvm::Value *, LValue>
4658 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4659                                    SourceLocation Loc) {
4660   ASTContext &C = CGM.getContext();
4661   QualType FlagsTy;
4662   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4663   RecordDecl *KmpDependInfoRD =
4664       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4665   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4666   LValue Base = CGF.EmitLoadOfPointerLValue(
4667       CGF.Builder.CreateElementBitCast(
4668           DepobjLVal.getAddress(CGF),
4669           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4670       KmpDependInfoPtrTy->castAs<PointerType>());
4671   Address DepObjAddr = CGF.Builder.CreateGEP(
4672       Base.getAddress(CGF),
4673       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4674   LValue NumDepsBase = CGF.MakeAddrLValue(
4675       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4676   // NumDeps = deps[i].base_addr;
4677   LValue BaseAddrLVal = CGF.EmitLValueForField(
4678       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4679   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4680   return std::make_pair(NumDeps, Base);
4681 }
4682 
4683 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4684                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4685                            const OMPTaskDataTy::DependData &Data,
4686                            Address DependenciesArray) {
4687   CodeGenModule &CGM = CGF.CGM;
4688   ASTContext &C = CGM.getContext();
4689   QualType FlagsTy;
4690   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4691   RecordDecl *KmpDependInfoRD =
4692       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4693   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4694 
4695   OMPIteratorGeneratorScope IteratorScope(
4696       CGF, cast_or_null<OMPIteratorExpr>(
4697                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4698                                  : nullptr));
4699   for (const Expr *E : Data.DepExprs) {
4700     llvm::Value *Addr;
4701     llvm::Value *Size;
4702     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4703     LValue Base;
4704     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4705       Base = CGF.MakeAddrLValue(
4706           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4707     } else {
4708       LValue &PosLVal = *Pos.get<LValue *>();
4709       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4710       Base = CGF.MakeAddrLValue(
4711           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4712     }
4713     // deps[i].base_addr = &<Dependencies[i].second>;
4714     LValue BaseAddrLVal = CGF.EmitLValueForField(
4715         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4716     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4717                           BaseAddrLVal);
4718     // deps[i].len = sizeof(<Dependencies[i].second>);
4719     LValue LenLVal = CGF.EmitLValueForField(
4720         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4721     CGF.EmitStoreOfScalar(Size, LenLVal);
4722     // deps[i].flags = <Dependencies[i].first>;
4723     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4724     LValue FlagsLVal = CGF.EmitLValueForField(
4725         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4726     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4727                           FlagsLVal);
4728     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4729       ++(*P);
4730     } else {
4731       LValue &PosLVal = *Pos.get<LValue *>();
4732       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4733       Idx = CGF.Builder.CreateNUWAdd(Idx,
4734                                      llvm::ConstantInt::get(Idx->getType(), 1));
4735       CGF.EmitStoreOfScalar(Idx, PosLVal);
4736     }
4737   }
4738 }
4739 
4740 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4741     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4742     const OMPTaskDataTy::DependData &Data) {
4743   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4744          "Expected depobj dependecy kind.");
4745   SmallVector<llvm::Value *, 4> Sizes;
4746   SmallVector<LValue, 4> SizeLVals;
4747   ASTContext &C = CGF.getContext();
4748   {
4749     OMPIteratorGeneratorScope IteratorScope(
4750         CGF, cast_or_null<OMPIteratorExpr>(
4751                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4752                                    : nullptr));
4753     for (const Expr *E : Data.DepExprs) {
4754       llvm::Value *NumDeps;
4755       LValue Base;
4756       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4757       std::tie(NumDeps, Base) =
4758           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4759       LValue NumLVal = CGF.MakeAddrLValue(
4760           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4761           C.getUIntPtrType());
4762       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4763                               NumLVal.getAddress(CGF));
4764       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4765       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4766       CGF.EmitStoreOfScalar(Add, NumLVal);
4767       SizeLVals.push_back(NumLVal);
4768     }
4769   }
4770   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4771     llvm::Value *Size =
4772         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4773     Sizes.push_back(Size);
4774   }
4775   return Sizes;
4776 }
4777 
4778 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4779                                          QualType &KmpDependInfoTy,
4780                                          LValue PosLVal,
4781                                          const OMPTaskDataTy::DependData &Data,
4782                                          Address DependenciesArray) {
4783   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4784          "Expected depobj dependecy kind.");
4785   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4786   {
4787     OMPIteratorGeneratorScope IteratorScope(
4788         CGF, cast_or_null<OMPIteratorExpr>(
4789                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4790                                    : nullptr));
4791     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4792       const Expr *E = Data.DepExprs[I];
4793       llvm::Value *NumDeps;
4794       LValue Base;
4795       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4796       std::tie(NumDeps, Base) =
4797           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4798 
4799       // memcopy dependency data.
4800       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4801           ElSize,
4802           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4803       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4804       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4805       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4806 
4807       // Increase pos.
4808       // pos += size;
4809       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4810       CGF.EmitStoreOfScalar(Add, PosLVal);
4811     }
4812   }
4813 }
4814 
4815 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4816     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4817     SourceLocation Loc) {
4818   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4819         return D.DepExprs.empty();
4820       }))
4821     return std::make_pair(nullptr, Address::invalid());
4822   // Process list of dependencies.
4823   ASTContext &C = CGM.getContext();
4824   Address DependenciesArray = Address::invalid();
4825   llvm::Value *NumOfElements = nullptr;
4826   unsigned NumDependencies = std::accumulate(
4827       Dependencies.begin(), Dependencies.end(), 0,
4828       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4829         return D.DepKind == OMPC_DEPEND_depobj
4830                    ? V
4831                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4832       });
4833   QualType FlagsTy;
4834   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4835   bool HasDepobjDeps = false;
4836   bool HasRegularWithIterators = false;
4837   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4838   llvm::Value *NumOfRegularWithIterators =
4839       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4840   // Calculate number of depobj dependecies and regular deps with the iterators.
4841   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4842     if (D.DepKind == OMPC_DEPEND_depobj) {
4843       SmallVector<llvm::Value *, 4> Sizes =
4844           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4845       for (llvm::Value *Size : Sizes) {
4846         NumOfDepobjElements =
4847             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4848       }
4849       HasDepobjDeps = true;
4850       continue;
4851     }
4852     // Include number of iterations, if any.
4853 
4854     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4855       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4856         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4857         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4858         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4859             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4860         NumOfRegularWithIterators =
4861             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4862       }
4863       HasRegularWithIterators = true;
4864       continue;
4865     }
4866   }
4867 
4868   QualType KmpDependInfoArrayTy;
4869   if (HasDepobjDeps || HasRegularWithIterators) {
4870     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4871                                            /*isSigned=*/false);
4872     if (HasDepobjDeps) {
4873       NumOfElements =
4874           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4875     }
4876     if (HasRegularWithIterators) {
4877       NumOfElements =
4878           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4879     }
4880     auto *OVE = new (C) OpaqueValueExpr(
4881         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4882         VK_PRValue);
4883     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4884                                                   RValue::get(NumOfElements));
4885     KmpDependInfoArrayTy =
4886         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4887                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4888     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4889     // Properly emit variable-sized array.
4890     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4891                                          ImplicitParamDecl::Other);
4892     CGF.EmitVarDecl(*PD);
4893     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4894     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4895                                               /*isSigned=*/false);
4896   } else {
4897     KmpDependInfoArrayTy = C.getConstantArrayType(
4898         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4899         ArrayType::Normal, /*IndexTypeQuals=*/0);
4900     DependenciesArray =
4901         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4902     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4903     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4904                                            /*isSigned=*/false);
4905   }
4906   unsigned Pos = 0;
4907   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4908     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4909         Dependencies[I].IteratorExpr)
4910       continue;
4911     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4912                    DependenciesArray);
4913   }
4914   // Copy regular dependecies with iterators.
4915   LValue PosLVal = CGF.MakeAddrLValue(
4916       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4917   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4918   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4919     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4920         !Dependencies[I].IteratorExpr)
4921       continue;
4922     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4923                    DependenciesArray);
4924   }
4925   // Copy final depobj arrays without iterators.
4926   if (HasDepobjDeps) {
4927     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4928       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4929         continue;
4930       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4931                          DependenciesArray);
4932     }
4933   }
4934   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4935       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4936   return std::make_pair(NumOfElements, DependenciesArray);
4937 }
4938 
4939 Address CGOpenMPRuntime::emitDepobjDependClause(
4940     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4941     SourceLocation Loc) {
4942   if (Dependencies.DepExprs.empty())
4943     return Address::invalid();
4944   // Process list of dependencies.
4945   ASTContext &C = CGM.getContext();
4946   Address DependenciesArray = Address::invalid();
4947   unsigned NumDependencies = Dependencies.DepExprs.size();
4948   QualType FlagsTy;
4949   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4950   RecordDecl *KmpDependInfoRD =
4951       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4952 
4953   llvm::Value *Size;
4954   // Define type kmp_depend_info[<Dependencies.size()>];
4955   // For depobj reserve one extra element to store the number of elements.
4956   // It is required to handle depobj(x) update(in) construct.
4957   // kmp_depend_info[<Dependencies.size()>] deps;
4958   llvm::Value *NumDepsVal;
4959   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4960   if (const auto *IE =
4961           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4962     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4963     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4964       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4965       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4966       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4967     }
4968     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4969                                     NumDepsVal);
4970     CharUnits SizeInBytes =
4971         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4972     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4973     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4974     NumDepsVal =
4975         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4976   } else {
4977     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4978         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4979         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4980     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4981     Size = CGM.getSize(Sz.alignTo(Align));
4982     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4983   }
4984   // Need to allocate on the dynamic memory.
4985   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4986   // Use default allocator.
4987   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4988   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4989 
4990   llvm::Value *Addr =
4991       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4992                               CGM.getModule(), OMPRTL___kmpc_alloc),
4993                           Args, ".dep.arr.addr");
4994   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4995   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4996       Addr, KmpDependInfoLlvmTy->getPointerTo());
4997   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4998   // Write number of elements in the first element of array for depobj.
4999   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5000   // deps[i].base_addr = NumDependencies;
5001   LValue BaseAddrLVal = CGF.EmitLValueForField(
5002       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5003   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5004   llvm::PointerUnion<unsigned *, LValue *> Pos;
5005   unsigned Idx = 1;
5006   LValue PosLVal;
5007   if (Dependencies.IteratorExpr) {
5008     PosLVal = CGF.MakeAddrLValue(
5009         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5010         C.getSizeType());
5011     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5012                           /*IsInit=*/true);
5013     Pos = &PosLVal;
5014   } else {
5015     Pos = &Idx;
5016   }
5017   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5018   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5019       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
5020       CGF.Int8Ty);
5021   return DependenciesArray;
5022 }
5023 
5024 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5025                                         SourceLocation Loc) {
5026   ASTContext &C = CGM.getContext();
5027   QualType FlagsTy;
5028   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5029   LValue Base = CGF.EmitLoadOfPointerLValue(
5030       DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
5031   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5032   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5033       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
5034       CGF.ConvertTypeForMem(KmpDependInfoTy));
5035   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5036       Addr.getElementType(), Addr.getPointer(),
5037       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5038   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5039                                                                CGF.VoidPtrTy);
5040   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5041   // Use default allocator.
5042   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5043   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5044 
5045   // _kmpc_free(gtid, addr, nullptr);
5046   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5047                                 CGM.getModule(), OMPRTL___kmpc_free),
5048                             Args);
5049 }
5050 
5051 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5052                                        OpenMPDependClauseKind NewDepKind,
5053                                        SourceLocation Loc) {
5054   ASTContext &C = CGM.getContext();
5055   QualType FlagsTy;
5056   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5057   RecordDecl *KmpDependInfoRD =
5058       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5059   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5060   llvm::Value *NumDeps;
5061   LValue Base;
5062   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5063 
5064   Address Begin = Base.getAddress(CGF);
5065   // Cast from pointer to array type to pointer to single element.
5066   llvm::Value *End = CGF.Builder.CreateGEP(
5067       Begin.getElementType(), Begin.getPointer(), NumDeps);
5068   // The basic structure here is a while-do loop.
5069   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5070   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5071   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5072   CGF.EmitBlock(BodyBB);
5073   llvm::PHINode *ElementPHI =
5074       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5075   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5076   Begin = Begin.withPointer(ElementPHI);
5077   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5078                             Base.getTBAAInfo());
5079   // deps[i].flags = NewDepKind;
5080   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5081   LValue FlagsLVal = CGF.EmitLValueForField(
5082       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5083   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5084                         FlagsLVal);
5085 
5086   // Shift the address forward by one element.
5087   Address ElementNext =
5088       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5089   ElementPHI->addIncoming(ElementNext.getPointer(),
5090                           CGF.Builder.GetInsertBlock());
5091   llvm::Value *IsEmpty =
5092       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5093   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5094   // Done.
5095   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5096 }
5097 
5098 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5099                                    const OMPExecutableDirective &D,
5100                                    llvm::Function *TaskFunction,
5101                                    QualType SharedsTy, Address Shareds,
5102                                    const Expr *IfCond,
5103                                    const OMPTaskDataTy &Data) {
5104   if (!CGF.HaveInsertPoint())
5105     return;
5106 
5107   TaskResultTy Result =
5108       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5109   llvm::Value *NewTask = Result.NewTask;
5110   llvm::Function *TaskEntry = Result.TaskEntry;
5111   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5112   LValue TDBase = Result.TDBase;
5113   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5114   // Process list of dependences.
5115   Address DependenciesArray = Address::invalid();
5116   llvm::Value *NumOfElements;
5117   std::tie(NumOfElements, DependenciesArray) =
5118       emitDependClause(CGF, Data.Dependences, Loc);
5119 
5120   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5121   // libcall.
5122   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5123   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5124   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5125   // list is not empty
5126   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5127   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5128   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5129   llvm::Value *DepTaskArgs[7];
5130   if (!Data.Dependences.empty()) {
5131     DepTaskArgs[0] = UpLoc;
5132     DepTaskArgs[1] = ThreadID;
5133     DepTaskArgs[2] = NewTask;
5134     DepTaskArgs[3] = NumOfElements;
5135     DepTaskArgs[4] = DependenciesArray.getPointer();
5136     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5137     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5138   }
5139   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5140                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5141     if (!Data.Tied) {
5142       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5143       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5144       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5145     }
5146     if (!Data.Dependences.empty()) {
5147       CGF.EmitRuntimeCall(
5148           OMPBuilder.getOrCreateRuntimeFunction(
5149               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5150           DepTaskArgs);
5151     } else {
5152       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5153                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5154                           TaskArgs);
5155     }
5156     // Check if parent region is untied and build return for untied task;
5157     if (auto *Region =
5158             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5159       Region->emitUntiedSwitch(CGF);
5160   };
5161 
5162   llvm::Value *DepWaitTaskArgs[6];
5163   if (!Data.Dependences.empty()) {
5164     DepWaitTaskArgs[0] = UpLoc;
5165     DepWaitTaskArgs[1] = ThreadID;
5166     DepWaitTaskArgs[2] = NumOfElements;
5167     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5168     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5169     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5170   }
5171   auto &M = CGM.getModule();
5172   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5173                         TaskEntry, &Data, &DepWaitTaskArgs,
5174                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5175     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5176     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5177     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5178     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5179     // is specified.
5180     if (!Data.Dependences.empty())
5181       CGF.EmitRuntimeCall(
5182           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5183           DepWaitTaskArgs);
5184     // Call proxy_task_entry(gtid, new_task);
5185     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5186                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5187       Action.Enter(CGF);
5188       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5189       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5190                                                           OutlinedFnArgs);
5191     };
5192 
5193     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5194     // kmp_task_t *new_task);
5195     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5196     // kmp_task_t *new_task);
5197     RegionCodeGenTy RCG(CodeGen);
5198     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5199                               M, OMPRTL___kmpc_omp_task_begin_if0),
5200                           TaskArgs,
5201                           OMPBuilder.getOrCreateRuntimeFunction(
5202                               M, OMPRTL___kmpc_omp_task_complete_if0),
5203                           TaskArgs);
5204     RCG.setAction(Action);
5205     RCG(CGF);
5206   };
5207 
5208   if (IfCond) {
5209     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5210   } else {
5211     RegionCodeGenTy ThenRCG(ThenCodeGen);
5212     ThenRCG(CGF);
5213   }
5214 }
5215 
5216 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5217                                        const OMPLoopDirective &D,
5218                                        llvm::Function *TaskFunction,
5219                                        QualType SharedsTy, Address Shareds,
5220                                        const Expr *IfCond,
5221                                        const OMPTaskDataTy &Data) {
5222   if (!CGF.HaveInsertPoint())
5223     return;
5224   TaskResultTy Result =
5225       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5226   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5227   // libcall.
5228   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5229   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5230   // sched, kmp_uint64 grainsize, void *task_dup);
5231   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5232   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5233   llvm::Value *IfVal;
5234   if (IfCond) {
5235     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5236                                       /*isSigned=*/true);
5237   } else {
5238     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5239   }
5240 
5241   LValue LBLVal = CGF.EmitLValueForField(
5242       Result.TDBase,
5243       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5244   const auto *LBVar =
5245       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5246   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5247                        LBLVal.getQuals(),
5248                        /*IsInitializer=*/true);
5249   LValue UBLVal = CGF.EmitLValueForField(
5250       Result.TDBase,
5251       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5252   const auto *UBVar =
5253       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5254   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5255                        UBLVal.getQuals(),
5256                        /*IsInitializer=*/true);
5257   LValue StLVal = CGF.EmitLValueForField(
5258       Result.TDBase,
5259       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5260   const auto *StVar =
5261       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5262   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5263                        StLVal.getQuals(),
5264                        /*IsInitializer=*/true);
5265   // Store reductions address.
5266   LValue RedLVal = CGF.EmitLValueForField(
5267       Result.TDBase,
5268       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5269   if (Data.Reductions) {
5270     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5271   } else {
5272     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5273                                CGF.getContext().VoidPtrTy);
5274   }
5275   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5276   llvm::Value *TaskArgs[] = {
5277       UpLoc,
5278       ThreadID,
5279       Result.NewTask,
5280       IfVal,
5281       LBLVal.getPointer(CGF),
5282       UBLVal.getPointer(CGF),
5283       CGF.EmitLoadOfScalar(StLVal, Loc),
5284       llvm::ConstantInt::getSigned(
5285           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5286       llvm::ConstantInt::getSigned(
5287           CGF.IntTy, Data.Schedule.getPointer()
5288                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5289                          : NoSchedule),
5290       Data.Schedule.getPointer()
5291           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5292                                       /*isSigned=*/false)
5293           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5294       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5295                              Result.TaskDupFn, CGF.VoidPtrTy)
5296                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5297   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5298                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5299                       TaskArgs);
5300 }
5301 
5302 /// Emit reduction operation for each element of array (required for
5303 /// array sections) LHS op = RHS.
5304 /// \param Type Type of array.
5305 /// \param LHSVar Variable on the left side of the reduction operation
5306 /// (references element of array in original variable).
5307 /// \param RHSVar Variable on the right side of the reduction operation
5308 /// (references element of array in original variable).
5309 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5310 /// RHSVar.
5311 static void EmitOMPAggregateReduction(
5312     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5313     const VarDecl *RHSVar,
5314     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5315                                   const Expr *, const Expr *)> &RedOpGen,
5316     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5317     const Expr *UpExpr = nullptr) {
5318   // Perform element-by-element initialization.
5319   QualType ElementTy;
5320   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5321   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5322 
5323   // Drill down to the base element type on both arrays.
5324   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5325   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5326 
5327   llvm::Value *RHSBegin = RHSAddr.getPointer();
5328   llvm::Value *LHSBegin = LHSAddr.getPointer();
5329   // Cast from pointer to array type to pointer to single element.
5330   llvm::Value *LHSEnd =
5331       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5332   // The basic structure here is a while-do loop.
5333   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5334   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5335   llvm::Value *IsEmpty =
5336       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5337   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5338 
5339   // Enter the loop body, making that address the current address.
5340   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5341   CGF.EmitBlock(BodyBB);
5342 
5343   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5344 
5345   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5346       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5347   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5348   Address RHSElementCurrent(
5349       RHSElementPHI, RHSAddr.getElementType(),
5350       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5351 
5352   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5353       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5354   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5355   Address LHSElementCurrent(
5356       LHSElementPHI, LHSAddr.getElementType(),
5357       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5358 
5359   // Emit copy.
5360   CodeGenFunction::OMPPrivateScope Scope(CGF);
5361   Scope.addPrivate(LHSVar, LHSElementCurrent);
5362   Scope.addPrivate(RHSVar, RHSElementCurrent);
5363   Scope.Privatize();
5364   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5365   Scope.ForceCleanup();
5366 
5367   // Shift the address forward by one element.
5368   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5369       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5370       "omp.arraycpy.dest.element");
5371   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5372       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5373       "omp.arraycpy.src.element");
5374   // Check whether we've reached the end.
5375   llvm::Value *Done =
5376       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5377   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5378   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5379   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5380 
5381   // Done.
5382   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5383 }
5384 
5385 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5386 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5387 /// UDR combiner function.
5388 static void emitReductionCombiner(CodeGenFunction &CGF,
5389                                   const Expr *ReductionOp) {
5390   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5391     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5392       if (const auto *DRE =
5393               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5394         if (const auto *DRD =
5395                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5396           std::pair<llvm::Function *, llvm::Function *> Reduction =
5397               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5398           RValue Func = RValue::get(Reduction.first);
5399           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5400           CGF.EmitIgnoredExpr(ReductionOp);
5401           return;
5402         }
5403   CGF.EmitIgnoredExpr(ReductionOp);
5404 }
5405 
5406 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5407     SourceLocation Loc, llvm::Type *ArgsElemType,
5408     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5409     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5410   ASTContext &C = CGM.getContext();
5411 
5412   // void reduction_func(void *LHSArg, void *RHSArg);
5413   FunctionArgList Args;
5414   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5415                            ImplicitParamDecl::Other);
5416   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5417                            ImplicitParamDecl::Other);
5418   Args.push_back(&LHSArg);
5419   Args.push_back(&RHSArg);
5420   const auto &CGFI =
5421       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5422   std::string Name = getName({"omp", "reduction", "reduction_func"});
5423   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5424                                     llvm::GlobalValue::InternalLinkage, Name,
5425                                     &CGM.getModule());
5426   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5427   Fn->setDoesNotRecurse();
5428   CodeGenFunction CGF(CGM);
5429   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5430 
5431   // Dst = (void*[n])(LHSArg);
5432   // Src = (void*[n])(RHSArg);
5433   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5434                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5435                   ArgsElemType->getPointerTo()),
5436               ArgsElemType, CGF.getPointerAlign());
5437   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5438                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5439                   ArgsElemType->getPointerTo()),
5440               ArgsElemType, CGF.getPointerAlign());
5441 
5442   //  ...
5443   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5444   //  ...
5445   CodeGenFunction::OMPPrivateScope Scope(CGF);
5446   const auto *IPriv = Privates.begin();
5447   unsigned Idx = 0;
5448   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5449     const auto *RHSVar =
5450         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5451     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5452     const auto *LHSVar =
5453         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5454     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5455     QualType PrivTy = (*IPriv)->getType();
5456     if (PrivTy->isVariablyModifiedType()) {
5457       // Get array size and emit VLA type.
5458       ++Idx;
5459       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5460       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5461       const VariableArrayType *VLA =
5462           CGF.getContext().getAsVariableArrayType(PrivTy);
5463       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5464       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5465           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5466       CGF.EmitVariablyModifiedType(PrivTy);
5467     }
5468   }
5469   Scope.Privatize();
5470   IPriv = Privates.begin();
5471   const auto *ILHS = LHSExprs.begin();
5472   const auto *IRHS = RHSExprs.begin();
5473   for (const Expr *E : ReductionOps) {
5474     if ((*IPriv)->getType()->isArrayType()) {
5475       // Emit reduction for array section.
5476       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5477       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5478       EmitOMPAggregateReduction(
5479           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5480           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5481             emitReductionCombiner(CGF, E);
5482           });
5483     } else {
5484       // Emit reduction for array subscript or single variable.
5485       emitReductionCombiner(CGF, E);
5486     }
5487     ++IPriv;
5488     ++ILHS;
5489     ++IRHS;
5490   }
5491   Scope.ForceCleanup();
5492   CGF.FinishFunction();
5493   return Fn;
5494 }
5495 
5496 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5497                                                   const Expr *ReductionOp,
5498                                                   const Expr *PrivateRef,
5499                                                   const DeclRefExpr *LHS,
5500                                                   const DeclRefExpr *RHS) {
5501   if (PrivateRef->getType()->isArrayType()) {
5502     // Emit reduction for array section.
5503     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5504     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5505     EmitOMPAggregateReduction(
5506         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5507         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5508           emitReductionCombiner(CGF, ReductionOp);
5509         });
5510   } else {
5511     // Emit reduction for array subscript or single variable.
5512     emitReductionCombiner(CGF, ReductionOp);
5513   }
5514 }
5515 
5516 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5517                                     ArrayRef<const Expr *> Privates,
5518                                     ArrayRef<const Expr *> LHSExprs,
5519                                     ArrayRef<const Expr *> RHSExprs,
5520                                     ArrayRef<const Expr *> ReductionOps,
5521                                     ReductionOptionsTy Options) {
5522   if (!CGF.HaveInsertPoint())
5523     return;
5524 
5525   bool WithNowait = Options.WithNowait;
5526   bool SimpleReduction = Options.SimpleReduction;
5527 
5528   // Next code should be emitted for reduction:
5529   //
5530   // static kmp_critical_name lock = { 0 };
5531   //
5532   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5533   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5534   //  ...
5535   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5536   //  *(Type<n>-1*)rhs[<n>-1]);
5537   // }
5538   //
5539   // ...
5540   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5541   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5542   // RedList, reduce_func, &<lock>)) {
5543   // case 1:
5544   //  ...
5545   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5546   //  ...
5547   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5548   // break;
5549   // case 2:
5550   //  ...
5551   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5552   //  ...
5553   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5554   // break;
5555   // default:;
5556   // }
5557   //
5558   // if SimpleReduction is true, only the next code is generated:
5559   //  ...
5560   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5561   //  ...
5562 
5563   ASTContext &C = CGM.getContext();
5564 
5565   if (SimpleReduction) {
5566     CodeGenFunction::RunCleanupsScope Scope(CGF);
5567     const auto *IPriv = Privates.begin();
5568     const auto *ILHS = LHSExprs.begin();
5569     const auto *IRHS = RHSExprs.begin();
5570     for (const Expr *E : ReductionOps) {
5571       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5572                                   cast<DeclRefExpr>(*IRHS));
5573       ++IPriv;
5574       ++ILHS;
5575       ++IRHS;
5576     }
5577     return;
5578   }
5579 
5580   // 1. Build a list of reduction variables.
5581   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5582   auto Size = RHSExprs.size();
5583   for (const Expr *E : Privates) {
5584     if (E->getType()->isVariablyModifiedType())
5585       // Reserve place for array size.
5586       ++Size;
5587   }
5588   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5589   QualType ReductionArrayTy =
5590       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5591                              /*IndexTypeQuals=*/0);
5592   Address ReductionList =
5593       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5594   const auto *IPriv = Privates.begin();
5595   unsigned Idx = 0;
5596   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5597     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5598     CGF.Builder.CreateStore(
5599         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5600             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5601         Elem);
5602     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5603       // Store array size.
5604       ++Idx;
5605       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5606       llvm::Value *Size = CGF.Builder.CreateIntCast(
5607           CGF.getVLASize(
5608                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5609               .NumElts,
5610           CGF.SizeTy, /*isSigned=*/false);
5611       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5612                               Elem);
5613     }
5614   }
5615 
5616   // 2. Emit reduce_func().
5617   llvm::Function *ReductionFn =
5618       emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5619                             Privates, LHSExprs, RHSExprs, ReductionOps);
5620 
5621   // 3. Create static kmp_critical_name lock = { 0 };
5622   std::string Name = getName({"reduction"});
5623   llvm::Value *Lock = getCriticalRegionLock(Name);
5624 
5625   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5626   // RedList, reduce_func, &<lock>);
5627   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5628   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5629   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5630   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5631       ReductionList.getPointer(), CGF.VoidPtrTy);
5632   llvm::Value *Args[] = {
5633       IdentTLoc,                             // ident_t *<loc>
5634       ThreadId,                              // i32 <gtid>
5635       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5636       ReductionArrayTySize,                  // size_type sizeof(RedList)
5637       RL,                                    // void *RedList
5638       ReductionFn, // void (*) (void *, void *) <reduce_func>
5639       Lock         // kmp_critical_name *&<lock>
5640   };
5641   llvm::Value *Res = CGF.EmitRuntimeCall(
5642       OMPBuilder.getOrCreateRuntimeFunction(
5643           CGM.getModule(),
5644           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5645       Args);
5646 
5647   // 5. Build switch(res)
5648   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5649   llvm::SwitchInst *SwInst =
5650       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5651 
5652   // 6. Build case 1:
5653   //  ...
5654   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5655   //  ...
5656   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5657   // break;
5658   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5659   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5660   CGF.EmitBlock(Case1BB);
5661 
5662   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5663   llvm::Value *EndArgs[] = {
5664       IdentTLoc, // ident_t *<loc>
5665       ThreadId,  // i32 <gtid>
5666       Lock       // kmp_critical_name *&<lock>
5667   };
5668   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5669                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5670     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5671     const auto *IPriv = Privates.begin();
5672     const auto *ILHS = LHSExprs.begin();
5673     const auto *IRHS = RHSExprs.begin();
5674     for (const Expr *E : ReductionOps) {
5675       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5676                                      cast<DeclRefExpr>(*IRHS));
5677       ++IPriv;
5678       ++ILHS;
5679       ++IRHS;
5680     }
5681   };
5682   RegionCodeGenTy RCG(CodeGen);
5683   CommonActionTy Action(
5684       nullptr, llvm::None,
5685       OMPBuilder.getOrCreateRuntimeFunction(
5686           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5687                                       : OMPRTL___kmpc_end_reduce),
5688       EndArgs);
5689   RCG.setAction(Action);
5690   RCG(CGF);
5691 
5692   CGF.EmitBranch(DefaultBB);
5693 
5694   // 7. Build case 2:
5695   //  ...
5696   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5697   //  ...
5698   // break;
5699   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5700   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5701   CGF.EmitBlock(Case2BB);
5702 
5703   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5704                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5705     const auto *ILHS = LHSExprs.begin();
5706     const auto *IRHS = RHSExprs.begin();
5707     const auto *IPriv = Privates.begin();
5708     for (const Expr *E : ReductionOps) {
5709       const Expr *XExpr = nullptr;
5710       const Expr *EExpr = nullptr;
5711       const Expr *UpExpr = nullptr;
5712       BinaryOperatorKind BO = BO_Comma;
5713       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5714         if (BO->getOpcode() == BO_Assign) {
5715           XExpr = BO->getLHS();
5716           UpExpr = BO->getRHS();
5717         }
5718       }
5719       // Try to emit update expression as a simple atomic.
5720       const Expr *RHSExpr = UpExpr;
5721       if (RHSExpr) {
5722         // Analyze RHS part of the whole expression.
5723         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5724                 RHSExpr->IgnoreParenImpCasts())) {
5725           // If this is a conditional operator, analyze its condition for
5726           // min/max reduction operator.
5727           RHSExpr = ACO->getCond();
5728         }
5729         if (const auto *BORHS =
5730                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5731           EExpr = BORHS->getRHS();
5732           BO = BORHS->getOpcode();
5733         }
5734       }
5735       if (XExpr) {
5736         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5737         auto &&AtomicRedGen = [BO, VD,
5738                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5739                                     const Expr *EExpr, const Expr *UpExpr) {
5740           LValue X = CGF.EmitLValue(XExpr);
5741           RValue E;
5742           if (EExpr)
5743             E = CGF.EmitAnyExpr(EExpr);
5744           CGF.EmitOMPAtomicSimpleUpdateExpr(
5745               X, E, BO, /*IsXLHSInRHSPart=*/true,
5746               llvm::AtomicOrdering::Monotonic, Loc,
5747               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5748                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5749                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5750                 CGF.emitOMPSimpleStore(
5751                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5752                     VD->getType().getNonReferenceType(), Loc);
5753                 PrivateScope.addPrivate(VD, LHSTemp);
5754                 (void)PrivateScope.Privatize();
5755                 return CGF.EmitAnyExpr(UpExpr);
5756               });
5757         };
5758         if ((*IPriv)->getType()->isArrayType()) {
5759           // Emit atomic reduction for array section.
5760           const auto *RHSVar =
5761               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5762           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5763                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5764         } else {
5765           // Emit atomic reduction for array subscript or single variable.
5766           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5767         }
5768       } else {
5769         // Emit as a critical region.
5770         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5771                                            const Expr *, const Expr *) {
5772           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5773           std::string Name = RT.getName({"atomic_reduction"});
5774           RT.emitCriticalRegion(
5775               CGF, Name,
5776               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5777                 Action.Enter(CGF);
5778                 emitReductionCombiner(CGF, E);
5779               },
5780               Loc);
5781         };
5782         if ((*IPriv)->getType()->isArrayType()) {
5783           const auto *LHSVar =
5784               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5785           const auto *RHSVar =
5786               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5787           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5788                                     CritRedGen);
5789         } else {
5790           CritRedGen(CGF, nullptr, nullptr, nullptr);
5791         }
5792       }
5793       ++ILHS;
5794       ++IRHS;
5795       ++IPriv;
5796     }
5797   };
5798   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5799   if (!WithNowait) {
5800     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5801     llvm::Value *EndArgs[] = {
5802         IdentTLoc, // ident_t *<loc>
5803         ThreadId,  // i32 <gtid>
5804         Lock       // kmp_critical_name *&<lock>
5805     };
5806     CommonActionTy Action(nullptr, llvm::None,
5807                           OMPBuilder.getOrCreateRuntimeFunction(
5808                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5809                           EndArgs);
5810     AtomicRCG.setAction(Action);
5811     AtomicRCG(CGF);
5812   } else {
5813     AtomicRCG(CGF);
5814   }
5815 
5816   CGF.EmitBranch(DefaultBB);
5817   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5818 }
5819 
5820 /// Generates unique name for artificial threadprivate variables.
5821 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5822 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5823                                       const Expr *Ref) {
5824   SmallString<256> Buffer;
5825   llvm::raw_svector_ostream Out(Buffer);
5826   const clang::DeclRefExpr *DE;
5827   const VarDecl *D = ::getBaseDecl(Ref, DE);
5828   if (!D)
5829     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5830   D = D->getCanonicalDecl();
5831   std::string Name = CGM.getOpenMPRuntime().getName(
5832       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5833   Out << Prefix << Name << "_"
5834       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5835   return std::string(Out.str());
5836 }
5837 
5838 /// Emits reduction initializer function:
5839 /// \code
5840 /// void @.red_init(void* %arg, void* %orig) {
5841 /// %0 = bitcast void* %arg to <type>*
5842 /// store <type> <init>, <type>* %0
5843 /// ret void
5844 /// }
5845 /// \endcode
5846 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5847                                            SourceLocation Loc,
5848                                            ReductionCodeGen &RCG, unsigned N) {
5849   ASTContext &C = CGM.getContext();
5850   QualType VoidPtrTy = C.VoidPtrTy;
5851   VoidPtrTy.addRestrict();
5852   FunctionArgList Args;
5853   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5854                           ImplicitParamDecl::Other);
5855   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5856                               ImplicitParamDecl::Other);
5857   Args.emplace_back(&Param);
5858   Args.emplace_back(&ParamOrig);
5859   const auto &FnInfo =
5860       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5861   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5862   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5863   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5864                                     Name, &CGM.getModule());
5865   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5866   Fn->setDoesNotRecurse();
5867   CodeGenFunction CGF(CGM);
5868   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5869   QualType PrivateType = RCG.getPrivateType(N);
5870   Address PrivateAddr = CGF.EmitLoadOfPointer(
5871       CGF.Builder.CreateElementBitCast(
5872           CGF.GetAddrOfLocalVar(&Param),
5873           CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5874       C.getPointerType(PrivateType)->castAs<PointerType>());
5875   llvm::Value *Size = nullptr;
5876   // If the size of the reduction item is non-constant, load it from global
5877   // threadprivate variable.
5878   if (RCG.getSizes(N).second) {
5879     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5880         CGF, CGM.getContext().getSizeType(),
5881         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5882     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5883                                 CGM.getContext().getSizeType(), Loc);
5884   }
5885   RCG.emitAggregateType(CGF, N, Size);
5886   Address OrigAddr = Address::invalid();
5887   // If initializer uses initializer from declare reduction construct, emit a
5888   // pointer to the address of the original reduction item (reuired by reduction
5889   // initializer)
5890   if (RCG.usesReductionInitializer(N)) {
5891     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5892     OrigAddr = CGF.EmitLoadOfPointer(
5893         SharedAddr,
5894         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5895   }
5896   // Emit the initializer:
5897   // %0 = bitcast void* %arg to <type>*
5898   // store <type> <init>, <type>* %0
5899   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5900                          [](CodeGenFunction &) { return false; });
5901   CGF.FinishFunction();
5902   return Fn;
5903 }
5904 
5905 /// Emits reduction combiner function:
5906 /// \code
5907 /// void @.red_comb(void* %arg0, void* %arg1) {
5908 /// %lhs = bitcast void* %arg0 to <type>*
5909 /// %rhs = bitcast void* %arg1 to <type>*
5910 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5911 /// store <type> %2, <type>* %lhs
5912 /// ret void
5913 /// }
5914 /// \endcode
5915 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5916                                            SourceLocation Loc,
5917                                            ReductionCodeGen &RCG, unsigned N,
5918                                            const Expr *ReductionOp,
5919                                            const Expr *LHS, const Expr *RHS,
5920                                            const Expr *PrivateRef) {
5921   ASTContext &C = CGM.getContext();
5922   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5923   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5924   FunctionArgList Args;
5925   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5926                                C.VoidPtrTy, ImplicitParamDecl::Other);
5927   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5928                             ImplicitParamDecl::Other);
5929   Args.emplace_back(&ParamInOut);
5930   Args.emplace_back(&ParamIn);
5931   const auto &FnInfo =
5932       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5933   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5934   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5935   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5936                                     Name, &CGM.getModule());
5937   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5938   Fn->setDoesNotRecurse();
5939   CodeGenFunction CGF(CGM);
5940   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5941   llvm::Value *Size = nullptr;
5942   // If the size of the reduction item is non-constant, load it from global
5943   // threadprivate variable.
5944   if (RCG.getSizes(N).second) {
5945     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5946         CGF, CGM.getContext().getSizeType(),
5947         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5948     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5949                                 CGM.getContext().getSizeType(), Loc);
5950   }
5951   RCG.emitAggregateType(CGF, N, Size);
5952   // Remap lhs and rhs variables to the addresses of the function arguments.
5953   // %lhs = bitcast void* %arg0 to <type>*
5954   // %rhs = bitcast void* %arg1 to <type>*
5955   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5956   PrivateScope.addPrivate(
5957       LHSVD,
5958       // Pull out the pointer to the variable.
5959       CGF.EmitLoadOfPointer(
5960           CGF.Builder.CreateElementBitCast(
5961               CGF.GetAddrOfLocalVar(&ParamInOut),
5962               CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5963           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5964   PrivateScope.addPrivate(
5965       RHSVD,
5966       // Pull out the pointer to the variable.
5967       CGF.EmitLoadOfPointer(
5968           CGF.Builder.CreateElementBitCast(
5969             CGF.GetAddrOfLocalVar(&ParamIn),
5970             CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5971           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5972   PrivateScope.Privatize();
5973   // Emit the combiner body:
5974   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5975   // store <type> %2, <type>* %lhs
5976   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5977       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5978       cast<DeclRefExpr>(RHS));
5979   CGF.FinishFunction();
5980   return Fn;
5981 }
5982 
5983 /// Emits reduction finalizer function:
5984 /// \code
5985 /// void @.red_fini(void* %arg) {
5986 /// %0 = bitcast void* %arg to <type>*
5987 /// <destroy>(<type>* %0)
5988 /// ret void
5989 /// }
5990 /// \endcode
5991 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5992                                            SourceLocation Loc,
5993                                            ReductionCodeGen &RCG, unsigned N) {
5994   if (!RCG.needCleanups(N))
5995     return nullptr;
5996   ASTContext &C = CGM.getContext();
5997   FunctionArgList Args;
5998   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5999                           ImplicitParamDecl::Other);
6000   Args.emplace_back(&Param);
6001   const auto &FnInfo =
6002       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6003   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6004   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6005   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6006                                     Name, &CGM.getModule());
6007   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6008   Fn->setDoesNotRecurse();
6009   CodeGenFunction CGF(CGM);
6010   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6011   Address PrivateAddr = CGF.EmitLoadOfPointer(
6012       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
6013   llvm::Value *Size = nullptr;
6014   // If the size of the reduction item is non-constant, load it from global
6015   // threadprivate variable.
6016   if (RCG.getSizes(N).second) {
6017     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6018         CGF, CGM.getContext().getSizeType(),
6019         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6020     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6021                                 CGM.getContext().getSizeType(), Loc);
6022   }
6023   RCG.emitAggregateType(CGF, N, Size);
6024   // Emit the finalizer body:
6025   // <destroy>(<type>* %0)
6026   RCG.emitCleanups(CGF, N, PrivateAddr);
6027   CGF.FinishFunction(Loc);
6028   return Fn;
6029 }
6030 
6031 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6032     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6033     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6034   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6035     return nullptr;
6036 
6037   // Build typedef struct:
6038   // kmp_taskred_input {
6039   //   void *reduce_shar; // shared reduction item
6040   //   void *reduce_orig; // original reduction item used for initialization
6041   //   size_t reduce_size; // size of data item
6042   //   void *reduce_init; // data initialization routine
6043   //   void *reduce_fini; // data finalization routine
6044   //   void *reduce_comb; // data combiner routine
6045   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6046   // } kmp_taskred_input_t;
6047   ASTContext &C = CGM.getContext();
6048   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6049   RD->startDefinition();
6050   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6051   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6052   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6053   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6054   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6055   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6056   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6057       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6058   RD->completeDefinition();
6059   QualType RDType = C.getRecordType(RD);
6060   unsigned Size = Data.ReductionVars.size();
6061   llvm::APInt ArraySize(/*numBits=*/64, Size);
6062   QualType ArrayRDType = C.getConstantArrayType(
6063       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6064   // kmp_task_red_input_t .rd_input.[Size];
6065   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6066   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6067                        Data.ReductionCopies, Data.ReductionOps);
6068   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6069     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6070     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6071                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6072     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6073         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6074         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6075         ".rd_input.gep.");
6076     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6077     // ElemLVal.reduce_shar = &Shareds[Cnt];
6078     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6079     RCG.emitSharedOrigLValue(CGF, Cnt);
6080     llvm::Value *CastedShared =
6081         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6082     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6083     // ElemLVal.reduce_orig = &Origs[Cnt];
6084     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6085     llvm::Value *CastedOrig =
6086         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6087     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6088     RCG.emitAggregateType(CGF, Cnt);
6089     llvm::Value *SizeValInChars;
6090     llvm::Value *SizeVal;
6091     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6092     // We use delayed creation/initialization for VLAs and array sections. It is
6093     // required because runtime does not provide the way to pass the sizes of
6094     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6095     // threadprivate global variables are used to store these values and use
6096     // them in the functions.
6097     bool DelayedCreation = !!SizeVal;
6098     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6099                                                /*isSigned=*/false);
6100     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6101     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6102     // ElemLVal.reduce_init = init;
6103     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6104     llvm::Value *InitAddr =
6105         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6106     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6107     // ElemLVal.reduce_fini = fini;
6108     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6109     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6110     llvm::Value *FiniAddr = Fini
6111                                 ? CGF.EmitCastToVoidPtr(Fini)
6112                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6113     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6114     // ElemLVal.reduce_comb = comb;
6115     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6116     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6117         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6118         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6119     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6120     // ElemLVal.flags = 0;
6121     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6122     if (DelayedCreation) {
6123       CGF.EmitStoreOfScalar(
6124           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6125           FlagsLVal);
6126     } else
6127       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6128                                  FlagsLVal.getType());
6129   }
6130   if (Data.IsReductionWithTaskMod) {
6131     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6132     // is_ws, int num, void *data);
6133     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6134     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6135                                                   CGM.IntTy, /*isSigned=*/true);
6136     llvm::Value *Args[] = {
6137         IdentTLoc, GTid,
6138         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6139                                /*isSigned=*/true),
6140         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6141         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6142             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6143     return CGF.EmitRuntimeCall(
6144         OMPBuilder.getOrCreateRuntimeFunction(
6145             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6146         Args);
6147   }
6148   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6149   llvm::Value *Args[] = {
6150       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6151                                 /*isSigned=*/true),
6152       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6153       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6154                                                       CGM.VoidPtrTy)};
6155   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6156                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6157                              Args);
6158 }
6159 
6160 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6161                                             SourceLocation Loc,
6162                                             bool IsWorksharingReduction) {
6163   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6164   // is_ws, int num, void *data);
6165   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6166   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6167                                                 CGM.IntTy, /*isSigned=*/true);
6168   llvm::Value *Args[] = {IdentTLoc, GTid,
6169                          llvm::ConstantInt::get(CGM.IntTy,
6170                                                 IsWorksharingReduction ? 1 : 0,
6171                                                 /*isSigned=*/true)};
6172   (void)CGF.EmitRuntimeCall(
6173       OMPBuilder.getOrCreateRuntimeFunction(
6174           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6175       Args);
6176 }
6177 
6178 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6179                                               SourceLocation Loc,
6180                                               ReductionCodeGen &RCG,
6181                                               unsigned N) {
6182   auto Sizes = RCG.getSizes(N);
6183   // Emit threadprivate global variable if the type is non-constant
6184   // (Sizes.second = nullptr).
6185   if (Sizes.second) {
6186     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6187                                                      /*isSigned=*/false);
6188     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6189         CGF, CGM.getContext().getSizeType(),
6190         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6191     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6192   }
6193 }
6194 
6195 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6196                                               SourceLocation Loc,
6197                                               llvm::Value *ReductionsPtr,
6198                                               LValue SharedLVal) {
6199   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6200   // *d);
6201   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6202                                                    CGM.IntTy,
6203                                                    /*isSigned=*/true),
6204                          ReductionsPtr,
6205                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6206                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6207   return Address(
6208       CGF.EmitRuntimeCall(
6209           OMPBuilder.getOrCreateRuntimeFunction(
6210               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6211           Args),
6212       CGF.Int8Ty, SharedLVal.getAlignment());
6213 }
6214 
6215 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6216                                        const OMPTaskDataTy &Data) {
6217   if (!CGF.HaveInsertPoint())
6218     return;
6219 
6220   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6221     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6222     OMPBuilder.createTaskwait(CGF.Builder);
6223   } else {
6224     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6225     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6226     auto &M = CGM.getModule();
6227     Address DependenciesArray = Address::invalid();
6228     llvm::Value *NumOfElements;
6229     std::tie(NumOfElements, DependenciesArray) =
6230         emitDependClause(CGF, Data.Dependences, Loc);
6231     llvm::Value *DepWaitTaskArgs[6];
6232     if (!Data.Dependences.empty()) {
6233       DepWaitTaskArgs[0] = UpLoc;
6234       DepWaitTaskArgs[1] = ThreadID;
6235       DepWaitTaskArgs[2] = NumOfElements;
6236       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6237       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6238       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6239 
6240       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6241 
6242       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6243       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6244       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6245       // is specified.
6246       CGF.EmitRuntimeCall(
6247           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6248           DepWaitTaskArgs);
6249 
6250     } else {
6251 
6252       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6253       // global_tid);
6254       llvm::Value *Args[] = {UpLoc, ThreadID};
6255       // Ignore return result until untied tasks are supported.
6256       CGF.EmitRuntimeCall(
6257           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6258           Args);
6259     }
6260   }
6261 
6262   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6263     Region->emitUntiedSwitch(CGF);
6264 }
6265 
6266 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6267                                            OpenMPDirectiveKind InnerKind,
6268                                            const RegionCodeGenTy &CodeGen,
6269                                            bool HasCancel) {
6270   if (!CGF.HaveInsertPoint())
6271     return;
6272   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6273                                  InnerKind != OMPD_critical &&
6274                                      InnerKind != OMPD_master &&
6275                                      InnerKind != OMPD_masked);
6276   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6277 }
6278 
6279 namespace {
6280 enum RTCancelKind {
6281   CancelNoreq = 0,
6282   CancelParallel = 1,
6283   CancelLoop = 2,
6284   CancelSections = 3,
6285   CancelTaskgroup = 4
6286 };
6287 } // anonymous namespace
6288 
6289 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6290   RTCancelKind CancelKind = CancelNoreq;
6291   if (CancelRegion == OMPD_parallel)
6292     CancelKind = CancelParallel;
6293   else if (CancelRegion == OMPD_for)
6294     CancelKind = CancelLoop;
6295   else if (CancelRegion == OMPD_sections)
6296     CancelKind = CancelSections;
6297   else {
6298     assert(CancelRegion == OMPD_taskgroup);
6299     CancelKind = CancelTaskgroup;
6300   }
6301   return CancelKind;
6302 }
6303 
6304 void CGOpenMPRuntime::emitCancellationPointCall(
6305     CodeGenFunction &CGF, SourceLocation Loc,
6306     OpenMPDirectiveKind CancelRegion) {
6307   if (!CGF.HaveInsertPoint())
6308     return;
6309   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6310   // global_tid, kmp_int32 cncl_kind);
6311   if (auto *OMPRegionInfo =
6312           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6313     // For 'cancellation point taskgroup', the task region info may not have a
6314     // cancel. This may instead happen in another adjacent task.
6315     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6316       llvm::Value *Args[] = {
6317           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6318           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6319       // Ignore return result until untied tasks are supported.
6320       llvm::Value *Result = CGF.EmitRuntimeCall(
6321           OMPBuilder.getOrCreateRuntimeFunction(
6322               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6323           Args);
6324       // if (__kmpc_cancellationpoint()) {
6325       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6326       //   exit from construct;
6327       // }
6328       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6329       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6330       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6331       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6332       CGF.EmitBlock(ExitBB);
6333       if (CancelRegion == OMPD_parallel)
6334         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6335       // exit from construct;
6336       CodeGenFunction::JumpDest CancelDest =
6337           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6338       CGF.EmitBranchThroughCleanup(CancelDest);
6339       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6340     }
6341   }
6342 }
6343 
6344 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6345                                      const Expr *IfCond,
6346                                      OpenMPDirectiveKind CancelRegion) {
6347   if (!CGF.HaveInsertPoint())
6348     return;
6349   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6350   // kmp_int32 cncl_kind);
6351   auto &M = CGM.getModule();
6352   if (auto *OMPRegionInfo =
6353           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6354     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6355                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6356       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6357       llvm::Value *Args[] = {
6358           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6359           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6360       // Ignore return result until untied tasks are supported.
6361       llvm::Value *Result = CGF.EmitRuntimeCall(
6362           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6363       // if (__kmpc_cancel()) {
6364       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6365       //   exit from construct;
6366       // }
6367       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6368       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6369       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6370       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6371       CGF.EmitBlock(ExitBB);
6372       if (CancelRegion == OMPD_parallel)
6373         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6374       // exit from construct;
6375       CodeGenFunction::JumpDest CancelDest =
6376           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6377       CGF.EmitBranchThroughCleanup(CancelDest);
6378       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6379     };
6380     if (IfCond) {
6381       emitIfClause(CGF, IfCond, ThenGen,
6382                    [](CodeGenFunction &, PrePostActionTy &) {});
6383     } else {
6384       RegionCodeGenTy ThenRCG(ThenGen);
6385       ThenRCG(CGF);
6386     }
6387   }
6388 }
6389 
6390 namespace {
6391 /// Cleanup action for uses_allocators support.
6392 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6393   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6394 
6395 public:
6396   OMPUsesAllocatorsActionTy(
6397       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6398       : Allocators(Allocators) {}
6399   void Enter(CodeGenFunction &CGF) override {
6400     if (!CGF.HaveInsertPoint())
6401       return;
6402     for (const auto &AllocatorData : Allocators) {
6403       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6404           CGF, AllocatorData.first, AllocatorData.second);
6405     }
6406   }
6407   void Exit(CodeGenFunction &CGF) override {
6408     if (!CGF.HaveInsertPoint())
6409       return;
6410     for (const auto &AllocatorData : Allocators) {
6411       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6412                                                         AllocatorData.first);
6413     }
6414   }
6415 };
6416 } // namespace
6417 
6418 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6419     const OMPExecutableDirective &D, StringRef ParentName,
6420     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6421     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6422   assert(!ParentName.empty() && "Invalid target region parent name!");
6423   HasEmittedTargetRegion = true;
6424   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6425   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6426     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6427       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6428       if (!D.AllocatorTraits)
6429         continue;
6430       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6431     }
6432   }
6433   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6434   CodeGen.setAction(UsesAllocatorAction);
6435   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6436                                    IsOffloadEntry, CodeGen);
6437 }
6438 
6439 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6440                                              const Expr *Allocator,
6441                                              const Expr *AllocatorTraits) {
6442   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6443   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6444   // Use default memspace handle.
6445   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6446   llvm::Value *NumTraits = llvm::ConstantInt::get(
6447       CGF.IntTy, cast<ConstantArrayType>(
6448                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6449                      ->getSize()
6450                      .getLimitedValue());
6451   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6452   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6453       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6454   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6455                                            AllocatorTraitsLVal.getBaseInfo(),
6456                                            AllocatorTraitsLVal.getTBAAInfo());
6457   llvm::Value *Traits =
6458       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6459 
6460   llvm::Value *AllocatorVal =
6461       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6462                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6463                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6464   // Store to allocator.
6465   CGF.EmitVarDecl(*cast<VarDecl>(
6466       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6467   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6468   AllocatorVal =
6469       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6470                                Allocator->getType(), Allocator->getExprLoc());
6471   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6472 }
6473 
6474 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6475                                              const Expr *Allocator) {
6476   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6477   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6478   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6479   llvm::Value *AllocatorVal =
6480       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6481   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6482                                           CGF.getContext().VoidPtrTy,
6483                                           Allocator->getExprLoc());
6484   (void)CGF.EmitRuntimeCall(
6485       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6486                                             OMPRTL___kmpc_destroy_allocator),
6487       {ThreadId, AllocatorVal});
6488 }
6489 
6490 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6491     const OMPExecutableDirective &D, StringRef ParentName,
6492     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6493     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6494   // Create a unique name for the entry function using the source location
6495   // information of the current target region. The name will be something like:
6496   //
6497   // __omp_offloading_DD_FFFF_PP_lBB
6498   //
6499   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6500   // mangled name of the function that encloses the target region and BB is the
6501   // line number of the target region.
6502 
6503   const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6504                                !CGM.getLangOpts().OpenMPOffloadMandatory;
6505   unsigned DeviceID;
6506   unsigned FileID;
6507   unsigned Line;
6508   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6509                            Line);
6510   SmallString<64> EntryFnName;
6511   {
6512     llvm::raw_svector_ostream OS(EntryFnName);
6513     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6514        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6515   }
6516 
6517   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6518 
6519   CodeGenFunction CGF(CGM, true);
6520   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6521   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6522 
6523   if (BuildOutlinedFn)
6524     OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6525 
6526   // If this target outline function is not an offload entry, we don't need to
6527   // register it.
6528   if (!IsOffloadEntry)
6529     return;
6530 
6531   // The target region ID is used by the runtime library to identify the current
6532   // target region, so it only has to be unique and not necessarily point to
6533   // anything. It could be the pointer to the outlined function that implements
6534   // the target region, but we aren't using that so that the compiler doesn't
6535   // need to keep that, and could therefore inline the host function if proven
6536   // worthwhile during optimization. In the other hand, if emitting code for the
6537   // device, the ID has to be the function address so that it can retrieved from
6538   // the offloading entry and launched by the runtime library. We also mark the
6539   // outlined function to have external linkage in case we are emitting code for
6540   // the device, because these functions will be entry points to the device.
6541 
6542   if (CGM.getLangOpts().OpenMPIsDevice) {
6543     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6544     OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
6545     OutlinedFn->setDSOLocal(false);
6546     if (CGM.getTriple().isAMDGCN())
6547       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6548   } else {
6549     std::string Name = getName({EntryFnName, "region_id"});
6550     OutlinedFnID = new llvm::GlobalVariable(
6551         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6552         llvm::GlobalValue::WeakAnyLinkage,
6553         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6554   }
6555 
6556   // If we do not allow host fallback we still need a named address to use.
6557   llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6558   if (!BuildOutlinedFn) {
6559     assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
6560            "Named kernel already exists?");
6561     TargetRegionEntryAddr = new llvm::GlobalVariable(
6562         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6563         llvm::GlobalValue::InternalLinkage,
6564         llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6565   }
6566 
6567   // Register the information for the entry associated with this target region.
6568   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6569       DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
6570       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6571 
6572   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6573   int32_t DefaultValTeams = -1;
6574   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6575   if (DefaultValTeams > 0 && OutlinedFn) {
6576     OutlinedFn->addFnAttr("omp_target_num_teams",
6577                           std::to_string(DefaultValTeams));
6578   }
6579   int32_t DefaultValThreads = -1;
6580   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6581   if (DefaultValThreads > 0 && OutlinedFn) {
6582     OutlinedFn->addFnAttr("omp_target_thread_limit",
6583                           std::to_string(DefaultValThreads));
6584   }
6585 
6586   if (BuildOutlinedFn)
6587     CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6588 }
6589 
6590 /// Checks if the expression is constant or does not have non-trivial function
6591 /// calls.
6592 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6593   // We can skip constant expressions.
6594   // We can skip expressions with trivial calls or simple expressions.
6595   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6596           !E->hasNonTrivialCall(Ctx)) &&
6597          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6598 }
6599 
6600 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6601                                                     const Stmt *Body) {
6602   const Stmt *Child = Body->IgnoreContainers();
6603   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6604     Child = nullptr;
6605     for (const Stmt *S : C->body()) {
6606       if (const auto *E = dyn_cast<Expr>(S)) {
6607         if (isTrivial(Ctx, E))
6608           continue;
6609       }
6610       // Some of the statements can be ignored.
6611       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6612           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6613         continue;
6614       // Analyze declarations.
6615       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6616         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6617               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6618                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6619                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6620                   isa<UsingDirectiveDecl>(D) ||
6621                   isa<OMPDeclareReductionDecl>(D) ||
6622                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6623                 return true;
6624               const auto *VD = dyn_cast<VarDecl>(D);
6625               if (!VD)
6626                 return false;
6627               return VD->hasGlobalStorage() || !VD->isUsed();
6628             }))
6629           continue;
6630       }
6631       // Found multiple children - cannot get the one child only.
6632       if (Child)
6633         return nullptr;
6634       Child = S;
6635     }
6636     if (Child)
6637       Child = Child->IgnoreContainers();
6638   }
6639   return Child;
6640 }
6641 
6642 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6643     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6644     int32_t &DefaultVal) {
6645 
6646   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6647   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6648          "Expected target-based executable directive.");
6649   switch (DirectiveKind) {
6650   case OMPD_target: {
6651     const auto *CS = D.getInnermostCapturedStmt();
6652     const auto *Body =
6653         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6654     const Stmt *ChildStmt =
6655         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6656     if (const auto *NestedDir =
6657             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6658       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6659         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6660           const Expr *NumTeams =
6661               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6662           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6663             if (auto Constant =
6664                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6665               DefaultVal = Constant->getExtValue();
6666           return NumTeams;
6667         }
6668         DefaultVal = 0;
6669         return nullptr;
6670       }
6671       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6672           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6673         DefaultVal = 1;
6674         return nullptr;
6675       }
6676       DefaultVal = 1;
6677       return nullptr;
6678     }
6679     // A value of -1 is used to check if we need to emit no teams region
6680     DefaultVal = -1;
6681     return nullptr;
6682   }
6683   case OMPD_target_teams:
6684   case OMPD_target_teams_distribute:
6685   case OMPD_target_teams_distribute_simd:
6686   case OMPD_target_teams_distribute_parallel_for:
6687   case OMPD_target_teams_distribute_parallel_for_simd: {
6688     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6689       const Expr *NumTeams =
6690           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6691       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6692         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6693           DefaultVal = Constant->getExtValue();
6694       return NumTeams;
6695     }
6696     DefaultVal = 0;
6697     return nullptr;
6698   }
6699   case OMPD_target_parallel:
6700   case OMPD_target_parallel_for:
6701   case OMPD_target_parallel_for_simd:
6702   case OMPD_target_simd:
6703     DefaultVal = 1;
6704     return nullptr;
6705   case OMPD_parallel:
6706   case OMPD_for:
6707   case OMPD_parallel_for:
6708   case OMPD_parallel_master:
6709   case OMPD_parallel_sections:
6710   case OMPD_for_simd:
6711   case OMPD_parallel_for_simd:
6712   case OMPD_cancel:
6713   case OMPD_cancellation_point:
6714   case OMPD_ordered:
6715   case OMPD_threadprivate:
6716   case OMPD_allocate:
6717   case OMPD_task:
6718   case OMPD_simd:
6719   case OMPD_tile:
6720   case OMPD_unroll:
6721   case OMPD_sections:
6722   case OMPD_section:
6723   case OMPD_single:
6724   case OMPD_master:
6725   case OMPD_critical:
6726   case OMPD_taskyield:
6727   case OMPD_barrier:
6728   case OMPD_taskwait:
6729   case OMPD_taskgroup:
6730   case OMPD_atomic:
6731   case OMPD_flush:
6732   case OMPD_depobj:
6733   case OMPD_scan:
6734   case OMPD_teams:
6735   case OMPD_target_data:
6736   case OMPD_target_exit_data:
6737   case OMPD_target_enter_data:
6738   case OMPD_distribute:
6739   case OMPD_distribute_simd:
6740   case OMPD_distribute_parallel_for:
6741   case OMPD_distribute_parallel_for_simd:
6742   case OMPD_teams_distribute:
6743   case OMPD_teams_distribute_simd:
6744   case OMPD_teams_distribute_parallel_for:
6745   case OMPD_teams_distribute_parallel_for_simd:
6746   case OMPD_target_update:
6747   case OMPD_declare_simd:
6748   case OMPD_declare_variant:
6749   case OMPD_begin_declare_variant:
6750   case OMPD_end_declare_variant:
6751   case OMPD_declare_target:
6752   case OMPD_end_declare_target:
6753   case OMPD_declare_reduction:
6754   case OMPD_declare_mapper:
6755   case OMPD_taskloop:
6756   case OMPD_taskloop_simd:
6757   case OMPD_master_taskloop:
6758   case OMPD_master_taskloop_simd:
6759   case OMPD_parallel_master_taskloop:
6760   case OMPD_parallel_master_taskloop_simd:
6761   case OMPD_requires:
6762   case OMPD_metadirective:
6763   case OMPD_unknown:
6764     break;
6765   default:
6766     break;
6767   }
6768   llvm_unreachable("Unexpected directive kind.");
6769 }
6770 
6771 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6772     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6773   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6774          "Clauses associated with the teams directive expected to be emitted "
6775          "only for the host!");
6776   CGBuilderTy &Bld = CGF.Builder;
6777   int32_t DefaultNT = -1;
6778   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6779   if (NumTeams != nullptr) {
6780     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6781 
6782     switch (DirectiveKind) {
6783     case OMPD_target: {
6784       const auto *CS = D.getInnermostCapturedStmt();
6785       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6786       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6787       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6788                                                   /*IgnoreResultAssign*/ true);
6789       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6790                              /*isSigned=*/true);
6791     }
6792     case OMPD_target_teams:
6793     case OMPD_target_teams_distribute:
6794     case OMPD_target_teams_distribute_simd:
6795     case OMPD_target_teams_distribute_parallel_for:
6796     case OMPD_target_teams_distribute_parallel_for_simd: {
6797       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6798       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6799                                                   /*IgnoreResultAssign*/ true);
6800       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6801                              /*isSigned=*/true);
6802     }
6803     default:
6804       break;
6805     }
6806   } else if (DefaultNT == -1) {
6807     return nullptr;
6808   }
6809 
6810   return Bld.getInt32(DefaultNT);
6811 }
6812 
6813 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6814                                   llvm::Value *DefaultThreadLimitVal) {
6815   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6816       CGF.getContext(), CS->getCapturedStmt());
6817   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6818     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6819       llvm::Value *NumThreads = nullptr;
6820       llvm::Value *CondVal = nullptr;
6821       // Handle if clause. If if clause present, the number of threads is
6822       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6823       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6824         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6825         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6826         const OMPIfClause *IfClause = nullptr;
6827         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6828           if (C->getNameModifier() == OMPD_unknown ||
6829               C->getNameModifier() == OMPD_parallel) {
6830             IfClause = C;
6831             break;
6832           }
6833         }
6834         if (IfClause) {
6835           const Expr *Cond = IfClause->getCondition();
6836           bool Result;
6837           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6838             if (!Result)
6839               return CGF.Builder.getInt32(1);
6840           } else {
6841             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6842             if (const auto *PreInit =
6843                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6844               for (const auto *I : PreInit->decls()) {
6845                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6846                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6847                 } else {
6848                   CodeGenFunction::AutoVarEmission Emission =
6849                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6850                   CGF.EmitAutoVarCleanups(Emission);
6851                 }
6852               }
6853             }
6854             CondVal = CGF.EvaluateExprAsBool(Cond);
6855           }
6856         }
6857       }
6858       // Check the value of num_threads clause iff if clause was not specified
6859       // or is not evaluated to false.
6860       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6861         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6862         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6863         const auto *NumThreadsClause =
6864             Dir->getSingleClause<OMPNumThreadsClause>();
6865         CodeGenFunction::LexicalScope Scope(
6866             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6867         if (const auto *PreInit =
6868                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6869           for (const auto *I : PreInit->decls()) {
6870             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6871               CGF.EmitVarDecl(cast<VarDecl>(*I));
6872             } else {
6873               CodeGenFunction::AutoVarEmission Emission =
6874                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6875               CGF.EmitAutoVarCleanups(Emission);
6876             }
6877           }
6878         }
6879         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6880         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6881                                                /*isSigned=*/false);
6882         if (DefaultThreadLimitVal)
6883           NumThreads = CGF.Builder.CreateSelect(
6884               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6885               DefaultThreadLimitVal, NumThreads);
6886       } else {
6887         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6888                                            : CGF.Builder.getInt32(0);
6889       }
6890       // Process condition of the if clause.
6891       if (CondVal) {
6892         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6893                                               CGF.Builder.getInt32(1));
6894       }
6895       return NumThreads;
6896     }
6897     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6898       return CGF.Builder.getInt32(1);
6899     return DefaultThreadLimitVal;
6900   }
6901   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6902                                : CGF.Builder.getInt32(0);
6903 }
6904 
6905 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6906     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6907     int32_t &DefaultVal) {
6908   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6909   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6910          "Expected target-based executable directive.");
6911 
6912   switch (DirectiveKind) {
6913   case OMPD_target:
6914     // Teams have no clause thread_limit
6915     return nullptr;
6916   case OMPD_target_teams:
6917   case OMPD_target_teams_distribute:
6918     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6919       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6920       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6921       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6922         if (auto Constant =
6923                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6924           DefaultVal = Constant->getExtValue();
6925       return ThreadLimit;
6926     }
6927     return nullptr;
6928   case OMPD_target_parallel:
6929   case OMPD_target_parallel_for:
6930   case OMPD_target_parallel_for_simd:
6931   case OMPD_target_teams_distribute_parallel_for:
6932   case OMPD_target_teams_distribute_parallel_for_simd: {
6933     Expr *ThreadLimit = nullptr;
6934     Expr *NumThreads = nullptr;
6935     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6936       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6937       ThreadLimit = ThreadLimitClause->getThreadLimit();
6938       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6939         if (auto Constant =
6940                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6941           DefaultVal = Constant->getExtValue();
6942     }
6943     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6944       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6945       NumThreads = NumThreadsClause->getNumThreads();
6946       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6947         if (auto Constant =
6948                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6949           if (Constant->getExtValue() < DefaultVal) {
6950             DefaultVal = Constant->getExtValue();
6951             ThreadLimit = NumThreads;
6952           }
6953         }
6954       }
6955     }
6956     return ThreadLimit;
6957   }
6958   case OMPD_target_teams_distribute_simd:
6959   case OMPD_target_simd:
6960     DefaultVal = 1;
6961     return nullptr;
6962   case OMPD_parallel:
6963   case OMPD_for:
6964   case OMPD_parallel_for:
6965   case OMPD_parallel_master:
6966   case OMPD_parallel_sections:
6967   case OMPD_for_simd:
6968   case OMPD_parallel_for_simd:
6969   case OMPD_cancel:
6970   case OMPD_cancellation_point:
6971   case OMPD_ordered:
6972   case OMPD_threadprivate:
6973   case OMPD_allocate:
6974   case OMPD_task:
6975   case OMPD_simd:
6976   case OMPD_tile:
6977   case OMPD_unroll:
6978   case OMPD_sections:
6979   case OMPD_section:
6980   case OMPD_single:
6981   case OMPD_master:
6982   case OMPD_critical:
6983   case OMPD_taskyield:
6984   case OMPD_barrier:
6985   case OMPD_taskwait:
6986   case OMPD_taskgroup:
6987   case OMPD_atomic:
6988   case OMPD_flush:
6989   case OMPD_depobj:
6990   case OMPD_scan:
6991   case OMPD_teams:
6992   case OMPD_target_data:
6993   case OMPD_target_exit_data:
6994   case OMPD_target_enter_data:
6995   case OMPD_distribute:
6996   case OMPD_distribute_simd:
6997   case OMPD_distribute_parallel_for:
6998   case OMPD_distribute_parallel_for_simd:
6999   case OMPD_teams_distribute:
7000   case OMPD_teams_distribute_simd:
7001   case OMPD_teams_distribute_parallel_for:
7002   case OMPD_teams_distribute_parallel_for_simd:
7003   case OMPD_target_update:
7004   case OMPD_declare_simd:
7005   case OMPD_declare_variant:
7006   case OMPD_begin_declare_variant:
7007   case OMPD_end_declare_variant:
7008   case OMPD_declare_target:
7009   case OMPD_end_declare_target:
7010   case OMPD_declare_reduction:
7011   case OMPD_declare_mapper:
7012   case OMPD_taskloop:
7013   case OMPD_taskloop_simd:
7014   case OMPD_master_taskloop:
7015   case OMPD_master_taskloop_simd:
7016   case OMPD_parallel_master_taskloop:
7017   case OMPD_parallel_master_taskloop_simd:
7018   case OMPD_requires:
7019   case OMPD_unknown:
7020     break;
7021   default:
7022     break;
7023   }
7024   llvm_unreachable("Unsupported directive kind.");
7025 }
7026 
7027 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7028     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7029   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7030          "Clauses associated with the teams directive expected to be emitted "
7031          "only for the host!");
7032   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7033   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7034          "Expected target-based executable directive.");
7035   CGBuilderTy &Bld = CGF.Builder;
7036   llvm::Value *ThreadLimitVal = nullptr;
7037   llvm::Value *NumThreadsVal = nullptr;
7038   switch (DirectiveKind) {
7039   case OMPD_target: {
7040     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7041     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7042       return NumThreads;
7043     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7044         CGF.getContext(), CS->getCapturedStmt());
7045     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7046       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7047         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7048         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7049         const auto *ThreadLimitClause =
7050             Dir->getSingleClause<OMPThreadLimitClause>();
7051         CodeGenFunction::LexicalScope Scope(
7052             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7053         if (const auto *PreInit =
7054                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7055           for (const auto *I : PreInit->decls()) {
7056             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7057               CGF.EmitVarDecl(cast<VarDecl>(*I));
7058             } else {
7059               CodeGenFunction::AutoVarEmission Emission =
7060                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7061               CGF.EmitAutoVarCleanups(Emission);
7062             }
7063           }
7064         }
7065         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7066             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7067         ThreadLimitVal =
7068             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7069       }
7070       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7071           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7072         CS = Dir->getInnermostCapturedStmt();
7073         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7074             CGF.getContext(), CS->getCapturedStmt());
7075         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7076       }
7077       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7078           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7079         CS = Dir->getInnermostCapturedStmt();
7080         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7081           return NumThreads;
7082       }
7083       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7084         return Bld.getInt32(1);
7085     }
7086     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7087   }
7088   case OMPD_target_teams: {
7089     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7090       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7091       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7092       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7093           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7094       ThreadLimitVal =
7095           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7096     }
7097     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7098     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7099       return NumThreads;
7100     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7101         CGF.getContext(), CS->getCapturedStmt());
7102     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7103       if (Dir->getDirectiveKind() == OMPD_distribute) {
7104         CS = Dir->getInnermostCapturedStmt();
7105         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7106           return NumThreads;
7107       }
7108     }
7109     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7110   }
7111   case OMPD_target_teams_distribute:
7112     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7113       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7114       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7115       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7116           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7117       ThreadLimitVal =
7118           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7119     }
7120     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7121   case OMPD_target_parallel:
7122   case OMPD_target_parallel_for:
7123   case OMPD_target_parallel_for_simd:
7124   case OMPD_target_teams_distribute_parallel_for:
7125   case OMPD_target_teams_distribute_parallel_for_simd: {
7126     llvm::Value *CondVal = nullptr;
7127     // Handle if clause. If if clause present, the number of threads is
7128     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7129     if (D.hasClausesOfKind<OMPIfClause>()) {
7130       const OMPIfClause *IfClause = nullptr;
7131       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7132         if (C->getNameModifier() == OMPD_unknown ||
7133             C->getNameModifier() == OMPD_parallel) {
7134           IfClause = C;
7135           break;
7136         }
7137       }
7138       if (IfClause) {
7139         const Expr *Cond = IfClause->getCondition();
7140         bool Result;
7141         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7142           if (!Result)
7143             return Bld.getInt32(1);
7144         } else {
7145           CodeGenFunction::RunCleanupsScope Scope(CGF);
7146           CondVal = CGF.EvaluateExprAsBool(Cond);
7147         }
7148       }
7149     }
7150     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7151       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7152       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7153       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7154           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7155       ThreadLimitVal =
7156           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7157     }
7158     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7159       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7160       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7161       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7162           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7163       NumThreadsVal =
7164           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7165       ThreadLimitVal = ThreadLimitVal
7166                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7167                                                                 ThreadLimitVal),
7168                                               NumThreadsVal, ThreadLimitVal)
7169                            : NumThreadsVal;
7170     }
7171     if (!ThreadLimitVal)
7172       ThreadLimitVal = Bld.getInt32(0);
7173     if (CondVal)
7174       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7175     return ThreadLimitVal;
7176   }
7177   case OMPD_target_teams_distribute_simd:
7178   case OMPD_target_simd:
7179     return Bld.getInt32(1);
7180   case OMPD_parallel:
7181   case OMPD_for:
7182   case OMPD_parallel_for:
7183   case OMPD_parallel_master:
7184   case OMPD_parallel_sections:
7185   case OMPD_for_simd:
7186   case OMPD_parallel_for_simd:
7187   case OMPD_cancel:
7188   case OMPD_cancellation_point:
7189   case OMPD_ordered:
7190   case OMPD_threadprivate:
7191   case OMPD_allocate:
7192   case OMPD_task:
7193   case OMPD_simd:
7194   case OMPD_tile:
7195   case OMPD_unroll:
7196   case OMPD_sections:
7197   case OMPD_section:
7198   case OMPD_single:
7199   case OMPD_master:
7200   case OMPD_critical:
7201   case OMPD_taskyield:
7202   case OMPD_barrier:
7203   case OMPD_taskwait:
7204   case OMPD_taskgroup:
7205   case OMPD_atomic:
7206   case OMPD_flush:
7207   case OMPD_depobj:
7208   case OMPD_scan:
7209   case OMPD_teams:
7210   case OMPD_target_data:
7211   case OMPD_target_exit_data:
7212   case OMPD_target_enter_data:
7213   case OMPD_distribute:
7214   case OMPD_distribute_simd:
7215   case OMPD_distribute_parallel_for:
7216   case OMPD_distribute_parallel_for_simd:
7217   case OMPD_teams_distribute:
7218   case OMPD_teams_distribute_simd:
7219   case OMPD_teams_distribute_parallel_for:
7220   case OMPD_teams_distribute_parallel_for_simd:
7221   case OMPD_target_update:
7222   case OMPD_declare_simd:
7223   case OMPD_declare_variant:
7224   case OMPD_begin_declare_variant:
7225   case OMPD_end_declare_variant:
7226   case OMPD_declare_target:
7227   case OMPD_end_declare_target:
7228   case OMPD_declare_reduction:
7229   case OMPD_declare_mapper:
7230   case OMPD_taskloop:
7231   case OMPD_taskloop_simd:
7232   case OMPD_master_taskloop:
7233   case OMPD_master_taskloop_simd:
7234   case OMPD_parallel_master_taskloop:
7235   case OMPD_parallel_master_taskloop_simd:
7236   case OMPD_requires:
7237   case OMPD_metadirective:
7238   case OMPD_unknown:
7239     break;
7240   default:
7241     break;
7242   }
7243   llvm_unreachable("Unsupported directive kind.");
7244 }
7245 
7246 namespace {
7247 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7248 
7249 // Utility to handle information from clauses associated with a given
7250 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7251 // It provides a convenient interface to obtain the information and generate
7252 // code for that information.
7253 class MappableExprsHandler {
7254 public:
7255   /// Values for bit flags used to specify the mapping type for
7256   /// offloading.
7257   enum OpenMPOffloadMappingFlags : uint64_t {
7258     /// No flags
7259     OMP_MAP_NONE = 0x0,
7260     /// Allocate memory on the device and move data from host to device.
7261     OMP_MAP_TO = 0x01,
7262     /// Allocate memory on the device and move data from device to host.
7263     OMP_MAP_FROM = 0x02,
7264     /// Always perform the requested mapping action on the element, even
7265     /// if it was already mapped before.
7266     OMP_MAP_ALWAYS = 0x04,
7267     /// Delete the element from the device environment, ignoring the
7268     /// current reference count associated with the element.
7269     OMP_MAP_DELETE = 0x08,
7270     /// The element being mapped is a pointer-pointee pair; both the
7271     /// pointer and the pointee should be mapped.
7272     OMP_MAP_PTR_AND_OBJ = 0x10,
7273     /// This flags signals that the base address of an entry should be
7274     /// passed to the target kernel as an argument.
7275     OMP_MAP_TARGET_PARAM = 0x20,
7276     /// Signal that the runtime library has to return the device pointer
7277     /// in the current position for the data being mapped. Used when we have the
7278     /// use_device_ptr or use_device_addr clause.
7279     OMP_MAP_RETURN_PARAM = 0x40,
7280     /// This flag signals that the reference being passed is a pointer to
7281     /// private data.
7282     OMP_MAP_PRIVATE = 0x80,
7283     /// Pass the element to the device by value.
7284     OMP_MAP_LITERAL = 0x100,
7285     /// Implicit map
7286     OMP_MAP_IMPLICIT = 0x200,
7287     /// Close is a hint to the runtime to allocate memory close to
7288     /// the target device.
7289     OMP_MAP_CLOSE = 0x400,
7290     /// 0x800 is reserved for compatibility with XLC.
7291     /// Produce a runtime error if the data is not already allocated.
7292     OMP_MAP_PRESENT = 0x1000,
7293     // Increment and decrement a separate reference counter so that the data
7294     // cannot be unmapped within the associated region.  Thus, this flag is
7295     // intended to be used on 'target' and 'target data' directives because they
7296     // are inherently structured.  It is not intended to be used on 'target
7297     // enter data' and 'target exit data' directives because they are inherently
7298     // dynamic.
7299     // This is an OpenMP extension for the sake of OpenACC support.
7300     OMP_MAP_OMPX_HOLD = 0x2000,
7301     /// Signal that the runtime library should use args as an array of
7302     /// descriptor_dim pointers and use args_size as dims. Used when we have
7303     /// non-contiguous list items in target update directive
7304     OMP_MAP_NON_CONTIG = 0x100000000000,
7305     /// The 16 MSBs of the flags indicate whether the entry is member of some
7306     /// struct/class.
7307     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7308     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7309   };
7310 
7311   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7312   static unsigned getFlagMemberOffset() {
7313     unsigned Offset = 0;
7314     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7315          Remain = Remain >> 1)
7316       Offset++;
7317     return Offset;
7318   }
7319 
7320   /// Class that holds debugging information for a data mapping to be passed to
7321   /// the runtime library.
7322   class MappingExprInfo {
7323     /// The variable declaration used for the data mapping.
7324     const ValueDecl *MapDecl = nullptr;
7325     /// The original expression used in the map clause, or null if there is
7326     /// none.
7327     const Expr *MapExpr = nullptr;
7328 
7329   public:
7330     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7331         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7332 
7333     const ValueDecl *getMapDecl() const { return MapDecl; }
7334     const Expr *getMapExpr() const { return MapExpr; }
7335   };
7336 
7337   /// Class that associates information with a base pointer to be passed to the
7338   /// runtime library.
7339   class BasePointerInfo {
7340     /// The base pointer.
7341     llvm::Value *Ptr = nullptr;
7342     /// The base declaration that refers to this device pointer, or null if
7343     /// there is none.
7344     const ValueDecl *DevPtrDecl = nullptr;
7345 
7346   public:
7347     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7348         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7349     llvm::Value *operator*() const { return Ptr; }
7350     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7351     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7352   };
7353 
7354   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7355   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7356   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7357   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7358   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7359   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7360   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7361 
7362   /// This structure contains combined information generated for mappable
7363   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7364   /// mappers, and non-contiguous information.
7365   struct MapCombinedInfoTy {
7366     struct StructNonContiguousInfo {
7367       bool IsNonContiguous = false;
7368       MapDimArrayTy Dims;
7369       MapNonContiguousArrayTy Offsets;
7370       MapNonContiguousArrayTy Counts;
7371       MapNonContiguousArrayTy Strides;
7372     };
7373     MapExprsArrayTy Exprs;
7374     MapBaseValuesArrayTy BasePointers;
7375     MapValuesArrayTy Pointers;
7376     MapValuesArrayTy Sizes;
7377     MapFlagsArrayTy Types;
7378     MapMappersArrayTy Mappers;
7379     StructNonContiguousInfo NonContigInfo;
7380 
7381     /// Append arrays in \a CurInfo.
7382     void append(MapCombinedInfoTy &CurInfo) {
7383       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7384       BasePointers.append(CurInfo.BasePointers.begin(),
7385                           CurInfo.BasePointers.end());
7386       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7387       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7388       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7389       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7390       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7391                                  CurInfo.NonContigInfo.Dims.end());
7392       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7393                                     CurInfo.NonContigInfo.Offsets.end());
7394       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7395                                    CurInfo.NonContigInfo.Counts.end());
7396       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7397                                     CurInfo.NonContigInfo.Strides.end());
7398     }
7399   };
7400 
7401   /// Map between a struct and the its lowest & highest elements which have been
7402   /// mapped.
7403   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7404   ///                    HE(FieldIndex, Pointer)}
7405   struct StructRangeInfoTy {
7406     MapCombinedInfoTy PreliminaryMapData;
7407     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7408         0, Address::invalid()};
7409     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7410         0, Address::invalid()};
7411     Address Base = Address::invalid();
7412     Address LB = Address::invalid();
7413     bool IsArraySection = false;
7414     bool HasCompleteRecord = false;
7415   };
7416 
7417 private:
7418   /// Kind that defines how a device pointer has to be returned.
7419   struct MapInfo {
7420     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7421     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7422     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7423     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7424     bool ReturnDevicePointer = false;
7425     bool IsImplicit = false;
7426     const ValueDecl *Mapper = nullptr;
7427     const Expr *VarRef = nullptr;
7428     bool ForDeviceAddr = false;
7429 
7430     MapInfo() = default;
7431     MapInfo(
7432         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7433         OpenMPMapClauseKind MapType,
7434         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7435         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7436         bool ReturnDevicePointer, bool IsImplicit,
7437         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7438         bool ForDeviceAddr = false)
7439         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7440           MotionModifiers(MotionModifiers),
7441           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7442           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7443   };
7444 
7445   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7446   /// member and there is no map information about it, then emission of that
7447   /// entry is deferred until the whole struct has been processed.
7448   struct DeferredDevicePtrEntryTy {
7449     const Expr *IE = nullptr;
7450     const ValueDecl *VD = nullptr;
7451     bool ForDeviceAddr = false;
7452 
7453     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7454                              bool ForDeviceAddr)
7455         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7456   };
7457 
7458   /// The target directive from where the mappable clauses were extracted. It
7459   /// is either a executable directive or a user-defined mapper directive.
7460   llvm::PointerUnion<const OMPExecutableDirective *,
7461                      const OMPDeclareMapperDecl *>
7462       CurDir;
7463 
7464   /// Function the directive is being generated for.
7465   CodeGenFunction &CGF;
7466 
7467   /// Set of all first private variables in the current directive.
7468   /// bool data is set to true if the variable is implicitly marked as
7469   /// firstprivate, false otherwise.
7470   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7471 
7472   /// Map between device pointer declarations and their expression components.
7473   /// The key value for declarations in 'this' is null.
7474   llvm::DenseMap<
7475       const ValueDecl *,
7476       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7477       DevPointersMap;
7478 
7479   /// Map between lambda declarations and their map type.
7480   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7481 
7482   llvm::Value *getExprTypeSize(const Expr *E) const {
7483     QualType ExprTy = E->getType().getCanonicalType();
7484 
7485     // Calculate the size for array shaping expression.
7486     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7487       llvm::Value *Size =
7488           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7489       for (const Expr *SE : OAE->getDimensions()) {
7490         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7491         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7492                                       CGF.getContext().getSizeType(),
7493                                       SE->getExprLoc());
7494         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7495       }
7496       return Size;
7497     }
7498 
7499     // Reference types are ignored for mapping purposes.
7500     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7501       ExprTy = RefTy->getPointeeType().getCanonicalType();
7502 
7503     // Given that an array section is considered a built-in type, we need to
7504     // do the calculation based on the length of the section instead of relying
7505     // on CGF.getTypeSize(E->getType()).
7506     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7507       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7508                             OAE->getBase()->IgnoreParenImpCasts())
7509                             .getCanonicalType();
7510 
7511       // If there is no length associated with the expression and lower bound is
7512       // not specified too, that means we are using the whole length of the
7513       // base.
7514       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7515           !OAE->getLowerBound())
7516         return CGF.getTypeSize(BaseTy);
7517 
7518       llvm::Value *ElemSize;
7519       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7520         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7521       } else {
7522         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7523         assert(ATy && "Expecting array type if not a pointer type.");
7524         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7525       }
7526 
7527       // If we don't have a length at this point, that is because we have an
7528       // array section with a single element.
7529       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7530         return ElemSize;
7531 
7532       if (const Expr *LenExpr = OAE->getLength()) {
7533         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7534         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7535                                              CGF.getContext().getSizeType(),
7536                                              LenExpr->getExprLoc());
7537         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7538       }
7539       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7540              OAE->getLowerBound() && "expected array_section[lb:].");
7541       // Size = sizetype - lb * elemtype;
7542       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7543       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7544       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7545                                        CGF.getContext().getSizeType(),
7546                                        OAE->getLowerBound()->getExprLoc());
7547       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7548       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7549       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7550       LengthVal = CGF.Builder.CreateSelect(
7551           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7552       return LengthVal;
7553     }
7554     return CGF.getTypeSize(ExprTy);
7555   }
7556 
7557   /// Return the corresponding bits for a given map clause modifier. Add
7558   /// a flag marking the map as a pointer if requested. Add a flag marking the
7559   /// map as the first one of a series of maps that relate to the same map
7560   /// expression.
7561   OpenMPOffloadMappingFlags getMapTypeBits(
7562       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7563       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7564       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7565     OpenMPOffloadMappingFlags Bits =
7566         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7567     switch (MapType) {
7568     case OMPC_MAP_alloc:
7569     case OMPC_MAP_release:
7570       // alloc and release is the default behavior in the runtime library,  i.e.
7571       // if we don't pass any bits alloc/release that is what the runtime is
7572       // going to do. Therefore, we don't need to signal anything for these two
7573       // type modifiers.
7574       break;
7575     case OMPC_MAP_to:
7576       Bits |= OMP_MAP_TO;
7577       break;
7578     case OMPC_MAP_from:
7579       Bits |= OMP_MAP_FROM;
7580       break;
7581     case OMPC_MAP_tofrom:
7582       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7583       break;
7584     case OMPC_MAP_delete:
7585       Bits |= OMP_MAP_DELETE;
7586       break;
7587     case OMPC_MAP_unknown:
7588       llvm_unreachable("Unexpected map type!");
7589     }
7590     if (AddPtrFlag)
7591       Bits |= OMP_MAP_PTR_AND_OBJ;
7592     if (AddIsTargetParamFlag)
7593       Bits |= OMP_MAP_TARGET_PARAM;
7594     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7595       Bits |= OMP_MAP_ALWAYS;
7596     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7597       Bits |= OMP_MAP_CLOSE;
7598     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7599         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7600       Bits |= OMP_MAP_PRESENT;
7601     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7602       Bits |= OMP_MAP_OMPX_HOLD;
7603     if (IsNonContiguous)
7604       Bits |= OMP_MAP_NON_CONTIG;
7605     return Bits;
7606   }
7607 
7608   /// Return true if the provided expression is a final array section. A
7609   /// final array section, is one whose length can't be proved to be one.
7610   bool isFinalArraySectionExpression(const Expr *E) const {
7611     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7612 
7613     // It is not an array section and therefore not a unity-size one.
7614     if (!OASE)
7615       return false;
7616 
7617     // An array section with no colon always refer to a single element.
7618     if (OASE->getColonLocFirst().isInvalid())
7619       return false;
7620 
7621     const Expr *Length = OASE->getLength();
7622 
7623     // If we don't have a length we have to check if the array has size 1
7624     // for this dimension. Also, we should always expect a length if the
7625     // base type is pointer.
7626     if (!Length) {
7627       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7628                              OASE->getBase()->IgnoreParenImpCasts())
7629                              .getCanonicalType();
7630       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7631         return ATy->getSize().getSExtValue() != 1;
7632       // If we don't have a constant dimension length, we have to consider
7633       // the current section as having any size, so it is not necessarily
7634       // unitary. If it happen to be unity size, that's user fault.
7635       return true;
7636     }
7637 
7638     // Check if the length evaluates to 1.
7639     Expr::EvalResult Result;
7640     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7641       return true; // Can have more that size 1.
7642 
7643     llvm::APSInt ConstLength = Result.Val.getInt();
7644     return ConstLength.getSExtValue() != 1;
7645   }
7646 
7647   /// Generate the base pointers, section pointers, sizes, map type bits, and
7648   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7649   /// map type, map or motion modifiers, and expression components.
7650   /// \a IsFirstComponent should be set to true if the provided set of
7651   /// components is the first associated with a capture.
7652   void generateInfoForComponentList(
7653       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7654       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7655       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7656       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7657       bool IsFirstComponentList, bool IsImplicit,
7658       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7659       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7660       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7661           OverlappedElements = llvm::None) const {
7662     // The following summarizes what has to be generated for each map and the
7663     // types below. The generated information is expressed in this order:
7664     // base pointer, section pointer, size, flags
7665     // (to add to the ones that come from the map type and modifier).
7666     //
7667     // double d;
7668     // int i[100];
7669     // float *p;
7670     //
7671     // struct S1 {
7672     //   int i;
7673     //   float f[50];
7674     // }
7675     // struct S2 {
7676     //   int i;
7677     //   float f[50];
7678     //   S1 s;
7679     //   double *p;
7680     //   struct S2 *ps;
7681     //   int &ref;
7682     // }
7683     // S2 s;
7684     // S2 *ps;
7685     //
7686     // map(d)
7687     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7688     //
7689     // map(i)
7690     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7691     //
7692     // map(i[1:23])
7693     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7694     //
7695     // map(p)
7696     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7697     //
7698     // map(p[1:24])
7699     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7700     // in unified shared memory mode or for local pointers
7701     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7702     //
7703     // map(s)
7704     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7705     //
7706     // map(s.i)
7707     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7708     //
7709     // map(s.s.f)
7710     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7711     //
7712     // map(s.p)
7713     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7714     //
7715     // map(to: s.p[:22])
7716     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7717     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7718     // &(s.p), &(s.p[0]), 22*sizeof(double),
7719     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7720     // (*) alloc space for struct members, only this is a target parameter
7721     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7722     //      optimizes this entry out, same in the examples below)
7723     // (***) map the pointee (map: to)
7724     //
7725     // map(to: s.ref)
7726     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7727     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7728     // (*) alloc space for struct members, only this is a target parameter
7729     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7730     //      optimizes this entry out, same in the examples below)
7731     // (***) map the pointee (map: to)
7732     //
7733     // map(s.ps)
7734     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7735     //
7736     // map(from: s.ps->s.i)
7737     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7738     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7739     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7740     //
7741     // map(to: s.ps->ps)
7742     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7743     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7744     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7745     //
7746     // map(s.ps->ps->ps)
7747     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7748     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7749     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7750     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7751     //
7752     // map(to: s.ps->ps->s.f[:22])
7753     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7754     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7755     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7756     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7757     //
7758     // map(ps)
7759     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7760     //
7761     // map(ps->i)
7762     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7763     //
7764     // map(ps->s.f)
7765     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7766     //
7767     // map(from: ps->p)
7768     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7769     //
7770     // map(to: ps->p[:22])
7771     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7772     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7773     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7774     //
7775     // map(ps->ps)
7776     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7777     //
7778     // map(from: ps->ps->s.i)
7779     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7780     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7781     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7782     //
7783     // map(from: ps->ps->ps)
7784     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7785     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7786     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7787     //
7788     // map(ps->ps->ps->ps)
7789     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7790     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7791     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7792     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7793     //
7794     // map(to: ps->ps->ps->s.f[:22])
7795     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7796     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7797     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7798     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7799     //
7800     // map(to: s.f[:22]) map(from: s.p[:33])
7801     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7802     //     sizeof(double*) (**), TARGET_PARAM
7803     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7804     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7805     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7806     // (*) allocate contiguous space needed to fit all mapped members even if
7807     //     we allocate space for members not mapped (in this example,
7808     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7809     //     them as well because they fall between &s.f[0] and &s.p)
7810     //
7811     // map(from: s.f[:22]) map(to: ps->p[:33])
7812     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7813     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7814     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7815     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7816     // (*) the struct this entry pertains to is the 2nd element in the list of
7817     //     arguments, hence MEMBER_OF(2)
7818     //
7819     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7820     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7821     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7822     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7823     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7824     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7825     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7826     // (*) the struct this entry pertains to is the 4th element in the list
7827     //     of arguments, hence MEMBER_OF(4)
7828 
7829     // Track if the map information being generated is the first for a capture.
7830     bool IsCaptureFirstInfo = IsFirstComponentList;
7831     // When the variable is on a declare target link or in a to clause with
7832     // unified memory, a reference is needed to hold the host/device address
7833     // of the variable.
7834     bool RequiresReference = false;
7835 
7836     // Scan the components from the base to the complete expression.
7837     auto CI = Components.rbegin();
7838     auto CE = Components.rend();
7839     auto I = CI;
7840 
7841     // Track if the map information being generated is the first for a list of
7842     // components.
7843     bool IsExpressionFirstInfo = true;
7844     bool FirstPointerInComplexData = false;
7845     Address BP = Address::invalid();
7846     const Expr *AssocExpr = I->getAssociatedExpression();
7847     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7848     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7849     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7850 
7851     if (isa<MemberExpr>(AssocExpr)) {
7852       // The base is the 'this' pointer. The content of the pointer is going
7853       // to be the base of the field being mapped.
7854       BP = CGF.LoadCXXThisAddress();
7855     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7856                (OASE &&
7857                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7858       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7859     } else if (OAShE &&
7860                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7861       BP = Address(
7862           CGF.EmitScalarExpr(OAShE->getBase()),
7863           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7864           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7865     } else {
7866       // The base is the reference to the variable.
7867       // BP = &Var.
7868       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7869       if (const auto *VD =
7870               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7871         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7872                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7873           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7874               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7875                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7876             RequiresReference = true;
7877             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7878           }
7879         }
7880       }
7881 
7882       // If the variable is a pointer and is being dereferenced (i.e. is not
7883       // the last component), the base has to be the pointer itself, not its
7884       // reference. References are ignored for mapping purposes.
7885       QualType Ty =
7886           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7887       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7888         // No need to generate individual map information for the pointer, it
7889         // can be associated with the combined storage if shared memory mode is
7890         // active or the base declaration is not global variable.
7891         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7892         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7893             !VD || VD->hasLocalStorage())
7894           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7895         else
7896           FirstPointerInComplexData = true;
7897         ++I;
7898       }
7899     }
7900 
7901     // Track whether a component of the list should be marked as MEMBER_OF some
7902     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7903     // in a component list should be marked as MEMBER_OF, all subsequent entries
7904     // do not belong to the base struct. E.g.
7905     // struct S2 s;
7906     // s.ps->ps->ps->f[:]
7907     //   (1) (2) (3) (4)
7908     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7909     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7910     // is the pointee of ps(2) which is not member of struct s, so it should not
7911     // be marked as such (it is still PTR_AND_OBJ).
7912     // The variable is initialized to false so that PTR_AND_OBJ entries which
7913     // are not struct members are not considered (e.g. array of pointers to
7914     // data).
7915     bool ShouldBeMemberOf = false;
7916 
7917     // Variable keeping track of whether or not we have encountered a component
7918     // in the component list which is a member expression. Useful when we have a
7919     // pointer or a final array section, in which case it is the previous
7920     // component in the list which tells us whether we have a member expression.
7921     // E.g. X.f[:]
7922     // While processing the final array section "[:]" it is "f" which tells us
7923     // whether we are dealing with a member of a declared struct.
7924     const MemberExpr *EncounteredME = nullptr;
7925 
7926     // Track for the total number of dimension. Start from one for the dummy
7927     // dimension.
7928     uint64_t DimSize = 1;
7929 
7930     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7931     bool IsPrevMemberReference = false;
7932 
7933     for (; I != CE; ++I) {
7934       // If the current component is member of a struct (parent struct) mark it.
7935       if (!EncounteredME) {
7936         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7937         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7938         // as MEMBER_OF the parent struct.
7939         if (EncounteredME) {
7940           ShouldBeMemberOf = true;
7941           // Do not emit as complex pointer if this is actually not array-like
7942           // expression.
7943           if (FirstPointerInComplexData) {
7944             QualType Ty = std::prev(I)
7945                               ->getAssociatedDeclaration()
7946                               ->getType()
7947                               .getNonReferenceType();
7948             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7949             FirstPointerInComplexData = false;
7950           }
7951         }
7952       }
7953 
7954       auto Next = std::next(I);
7955 
7956       // We need to generate the addresses and sizes if this is the last
7957       // component, if the component is a pointer or if it is an array section
7958       // whose length can't be proved to be one. If this is a pointer, it
7959       // becomes the base address for the following components.
7960 
7961       // A final array section, is one whose length can't be proved to be one.
7962       // If the map item is non-contiguous then we don't treat any array section
7963       // as final array section.
7964       bool IsFinalArraySection =
7965           !IsNonContiguous &&
7966           isFinalArraySectionExpression(I->getAssociatedExpression());
7967 
7968       // If we have a declaration for the mapping use that, otherwise use
7969       // the base declaration of the map clause.
7970       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7971                                      ? I->getAssociatedDeclaration()
7972                                      : BaseDecl;
7973       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7974                                                : MapExpr;
7975 
7976       // Get information on whether the element is a pointer. Have to do a
7977       // special treatment for array sections given that they are built-in
7978       // types.
7979       const auto *OASE =
7980           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7981       const auto *OAShE =
7982           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7983       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7984       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7985       bool IsPointer =
7986           OAShE ||
7987           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7988                        .getCanonicalType()
7989                        ->isAnyPointerType()) ||
7990           I->getAssociatedExpression()->getType()->isAnyPointerType();
7991       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7992                                MapDecl &&
7993                                MapDecl->getType()->isLValueReferenceType();
7994       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7995 
7996       if (OASE)
7997         ++DimSize;
7998 
7999       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8000           IsFinalArraySection) {
8001         // If this is not the last component, we expect the pointer to be
8002         // associated with an array expression or member expression.
8003         assert((Next == CE ||
8004                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8005                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8006                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8007                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8008                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8009                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8010                "Unexpected expression");
8011 
8012         Address LB = Address::invalid();
8013         Address LowestElem = Address::invalid();
8014         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8015                                        const MemberExpr *E) {
8016           const Expr *BaseExpr = E->getBase();
8017           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8018           // scalar.
8019           LValue BaseLV;
8020           if (E->isArrow()) {
8021             LValueBaseInfo BaseInfo;
8022             TBAAAccessInfo TBAAInfo;
8023             Address Addr =
8024                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8025             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8026             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8027           } else {
8028             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8029           }
8030           return BaseLV;
8031         };
8032         if (OAShE) {
8033           LowestElem = LB =
8034               Address(CGF.EmitScalarExpr(OAShE->getBase()),
8035                       CGF.ConvertTypeForMem(
8036                           OAShE->getBase()->getType()->getPointeeType()),
8037                       CGF.getContext().getTypeAlignInChars(
8038                           OAShE->getBase()->getType()));
8039         } else if (IsMemberReference) {
8040           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8041           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8042           LowestElem = CGF.EmitLValueForFieldInitialization(
8043                               BaseLVal, cast<FieldDecl>(MapDecl))
8044                            .getAddress(CGF);
8045           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8046                    .getAddress(CGF);
8047         } else {
8048           LowestElem = LB =
8049               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8050                   .getAddress(CGF);
8051         }
8052 
8053         // If this component is a pointer inside the base struct then we don't
8054         // need to create any entry for it - it will be combined with the object
8055         // it is pointing to into a single PTR_AND_OBJ entry.
8056         bool IsMemberPointerOrAddr =
8057             EncounteredME &&
8058             (((IsPointer || ForDeviceAddr) &&
8059               I->getAssociatedExpression() == EncounteredME) ||
8060              (IsPrevMemberReference && !IsPointer) ||
8061              (IsMemberReference && Next != CE &&
8062               !Next->getAssociatedExpression()->getType()->isPointerType()));
8063         if (!OverlappedElements.empty() && Next == CE) {
8064           // Handle base element with the info for overlapped elements.
8065           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8066           assert(!IsPointer &&
8067                  "Unexpected base element with the pointer type.");
8068           // Mark the whole struct as the struct that requires allocation on the
8069           // device.
8070           PartialStruct.LowestElem = {0, LowestElem};
8071           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8072               I->getAssociatedExpression()->getType());
8073           Address HB = CGF.Builder.CreateConstGEP(
8074               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8075                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8076               TypeSize.getQuantity() - 1);
8077           PartialStruct.HighestElem = {
8078               std::numeric_limits<decltype(
8079                   PartialStruct.HighestElem.first)>::max(),
8080               HB};
8081           PartialStruct.Base = BP;
8082           PartialStruct.LB = LB;
8083           assert(
8084               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8085               "Overlapped elements must be used only once for the variable.");
8086           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8087           // Emit data for non-overlapped data.
8088           OpenMPOffloadMappingFlags Flags =
8089               OMP_MAP_MEMBER_OF |
8090               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8091                              /*AddPtrFlag=*/false,
8092                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8093           llvm::Value *Size = nullptr;
8094           // Do bitcopy of all non-overlapped structure elements.
8095           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8096                    Component : OverlappedElements) {
8097             Address ComponentLB = Address::invalid();
8098             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8099                  Component) {
8100               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8101                 const auto *FD = dyn_cast<FieldDecl>(VD);
8102                 if (FD && FD->getType()->isLValueReferenceType()) {
8103                   const auto *ME =
8104                       cast<MemberExpr>(MC.getAssociatedExpression());
8105                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8106                   ComponentLB =
8107                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8108                           .getAddress(CGF);
8109                 } else {
8110                   ComponentLB =
8111                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8112                           .getAddress(CGF);
8113                 }
8114                 Size = CGF.Builder.CreatePtrDiff(
8115                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8116                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8117                 break;
8118               }
8119             }
8120             assert(Size && "Failed to determine structure size");
8121             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8122             CombinedInfo.BasePointers.push_back(BP.getPointer());
8123             CombinedInfo.Pointers.push_back(LB.getPointer());
8124             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8125                 Size, CGF.Int64Ty, /*isSigned=*/true));
8126             CombinedInfo.Types.push_back(Flags);
8127             CombinedInfo.Mappers.push_back(nullptr);
8128             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8129                                                                       : 1);
8130             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8131           }
8132           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8133           CombinedInfo.BasePointers.push_back(BP.getPointer());
8134           CombinedInfo.Pointers.push_back(LB.getPointer());
8135           Size = CGF.Builder.CreatePtrDiff(
8136               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8137               CGF.EmitCastToVoidPtr(LB.getPointer()));
8138           CombinedInfo.Sizes.push_back(
8139               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8140           CombinedInfo.Types.push_back(Flags);
8141           CombinedInfo.Mappers.push_back(nullptr);
8142           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8143                                                                     : 1);
8144           break;
8145         }
8146         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8147         if (!IsMemberPointerOrAddr ||
8148             (Next == CE && MapType != OMPC_MAP_unknown)) {
8149           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8150           CombinedInfo.BasePointers.push_back(BP.getPointer());
8151           CombinedInfo.Pointers.push_back(LB.getPointer());
8152           CombinedInfo.Sizes.push_back(
8153               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8154           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8155                                                                     : 1);
8156 
8157           // If Mapper is valid, the last component inherits the mapper.
8158           bool HasMapper = Mapper && Next == CE;
8159           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8160 
8161           // We need to add a pointer flag for each map that comes from the
8162           // same expression except for the first one. We also need to signal
8163           // this map is the first one that relates with the current capture
8164           // (there is a set of entries for each capture).
8165           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8166               MapType, MapModifiers, MotionModifiers, IsImplicit,
8167               !IsExpressionFirstInfo || RequiresReference ||
8168                   FirstPointerInComplexData || IsMemberReference,
8169               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8170 
8171           if (!IsExpressionFirstInfo || IsMemberReference) {
8172             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8173             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8174             if (IsPointer || (IsMemberReference && Next != CE))
8175               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8176                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8177 
8178             if (ShouldBeMemberOf) {
8179               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8180               // should be later updated with the correct value of MEMBER_OF.
8181               Flags |= OMP_MAP_MEMBER_OF;
8182               // From now on, all subsequent PTR_AND_OBJ entries should not be
8183               // marked as MEMBER_OF.
8184               ShouldBeMemberOf = false;
8185             }
8186           }
8187 
8188           CombinedInfo.Types.push_back(Flags);
8189         }
8190 
8191         // If we have encountered a member expression so far, keep track of the
8192         // mapped member. If the parent is "*this", then the value declaration
8193         // is nullptr.
8194         if (EncounteredME) {
8195           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8196           unsigned FieldIndex = FD->getFieldIndex();
8197 
8198           // Update info about the lowest and highest elements for this struct
8199           if (!PartialStruct.Base.isValid()) {
8200             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8201             if (IsFinalArraySection) {
8202               Address HB =
8203                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8204                       .getAddress(CGF);
8205               PartialStruct.HighestElem = {FieldIndex, HB};
8206             } else {
8207               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8208             }
8209             PartialStruct.Base = BP;
8210             PartialStruct.LB = BP;
8211           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8212             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8213           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8214             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8215           }
8216         }
8217 
8218         // Need to emit combined struct for array sections.
8219         if (IsFinalArraySection || IsNonContiguous)
8220           PartialStruct.IsArraySection = true;
8221 
8222         // If we have a final array section, we are done with this expression.
8223         if (IsFinalArraySection)
8224           break;
8225 
8226         // The pointer becomes the base for the next element.
8227         if (Next != CE)
8228           BP = IsMemberReference ? LowestElem : LB;
8229 
8230         IsExpressionFirstInfo = false;
8231         IsCaptureFirstInfo = false;
8232         FirstPointerInComplexData = false;
8233         IsPrevMemberReference = IsMemberReference;
8234       } else if (FirstPointerInComplexData) {
8235         QualType Ty = Components.rbegin()
8236                           ->getAssociatedDeclaration()
8237                           ->getType()
8238                           .getNonReferenceType();
8239         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8240         FirstPointerInComplexData = false;
8241       }
8242     }
8243     // If ran into the whole component - allocate the space for the whole
8244     // record.
8245     if (!EncounteredME)
8246       PartialStruct.HasCompleteRecord = true;
8247 
8248     if (!IsNonContiguous)
8249       return;
8250 
8251     const ASTContext &Context = CGF.getContext();
8252 
8253     // For supporting stride in array section, we need to initialize the first
8254     // dimension size as 1, first offset as 0, and first count as 1
8255     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8256     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8257     MapValuesArrayTy CurStrides;
8258     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8259     uint64_t ElementTypeSize;
8260 
8261     // Collect Size information for each dimension and get the element size as
8262     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8263     // should be [10, 10] and the first stride is 4 btyes.
8264     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8265          Components) {
8266       const Expr *AssocExpr = Component.getAssociatedExpression();
8267       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8268 
8269       if (!OASE)
8270         continue;
8271 
8272       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8273       auto *CAT = Context.getAsConstantArrayType(Ty);
8274       auto *VAT = Context.getAsVariableArrayType(Ty);
8275 
8276       // We need all the dimension size except for the last dimension.
8277       assert((VAT || CAT || &Component == &*Components.begin()) &&
8278              "Should be either ConstantArray or VariableArray if not the "
8279              "first Component");
8280 
8281       // Get element size if CurStrides is empty.
8282       if (CurStrides.empty()) {
8283         const Type *ElementType = nullptr;
8284         if (CAT)
8285           ElementType = CAT->getElementType().getTypePtr();
8286         else if (VAT)
8287           ElementType = VAT->getElementType().getTypePtr();
8288         else
8289           assert(&Component == &*Components.begin() &&
8290                  "Only expect pointer (non CAT or VAT) when this is the "
8291                  "first Component");
8292         // If ElementType is null, then it means the base is a pointer
8293         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8294         // for next iteration.
8295         if (ElementType) {
8296           // For the case that having pointer as base, we need to remove one
8297           // level of indirection.
8298           if (&Component != &*Components.begin())
8299             ElementType = ElementType->getPointeeOrArrayElementType();
8300           ElementTypeSize =
8301               Context.getTypeSizeInChars(ElementType).getQuantity();
8302           CurStrides.push_back(
8303               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8304         }
8305       }
8306       // Get dimension value except for the last dimension since we don't need
8307       // it.
8308       if (DimSizes.size() < Components.size() - 1) {
8309         if (CAT)
8310           DimSizes.push_back(llvm::ConstantInt::get(
8311               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8312         else if (VAT)
8313           DimSizes.push_back(CGF.Builder.CreateIntCast(
8314               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8315               /*IsSigned=*/false));
8316       }
8317     }
8318 
8319     // Skip the dummy dimension since we have already have its information.
8320     auto *DI = DimSizes.begin() + 1;
8321     // Product of dimension.
8322     llvm::Value *DimProd =
8323         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8324 
8325     // Collect info for non-contiguous. Notice that offset, count, and stride
8326     // are only meaningful for array-section, so we insert a null for anything
8327     // other than array-section.
8328     // Also, the size of offset, count, and stride are not the same as
8329     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8330     // count, and stride are the same as the number of non-contiguous
8331     // declaration in target update to/from clause.
8332     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8333          Components) {
8334       const Expr *AssocExpr = Component.getAssociatedExpression();
8335 
8336       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8337         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8338             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8339             /*isSigned=*/false);
8340         CurOffsets.push_back(Offset);
8341         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8342         CurStrides.push_back(CurStrides.back());
8343         continue;
8344       }
8345 
8346       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8347 
8348       if (!OASE)
8349         continue;
8350 
8351       // Offset
8352       const Expr *OffsetExpr = OASE->getLowerBound();
8353       llvm::Value *Offset = nullptr;
8354       if (!OffsetExpr) {
8355         // If offset is absent, then we just set it to zero.
8356         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8357       } else {
8358         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8359                                            CGF.Int64Ty,
8360                                            /*isSigned=*/false);
8361       }
8362       CurOffsets.push_back(Offset);
8363 
8364       // Count
8365       const Expr *CountExpr = OASE->getLength();
8366       llvm::Value *Count = nullptr;
8367       if (!CountExpr) {
8368         // In Clang, once a high dimension is an array section, we construct all
8369         // the lower dimension as array section, however, for case like
8370         // arr[0:2][2], Clang construct the inner dimension as an array section
8371         // but it actually is not in an array section form according to spec.
8372         if (!OASE->getColonLocFirst().isValid() &&
8373             !OASE->getColonLocSecond().isValid()) {
8374           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8375         } else {
8376           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8377           // When the length is absent it defaults to ⌈(size −
8378           // lower-bound)/stride⌉, where size is the size of the array
8379           // dimension.
8380           const Expr *StrideExpr = OASE->getStride();
8381           llvm::Value *Stride =
8382               StrideExpr
8383                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8384                                               CGF.Int64Ty, /*isSigned=*/false)
8385                   : nullptr;
8386           if (Stride)
8387             Count = CGF.Builder.CreateUDiv(
8388                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8389           else
8390             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8391         }
8392       } else {
8393         Count = CGF.EmitScalarExpr(CountExpr);
8394       }
8395       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8396       CurCounts.push_back(Count);
8397 
8398       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8399       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8400       //              Offset      Count     Stride
8401       //    D0          0           1         4    (int)    <- dummy dimension
8402       //    D1          0           2         8    (2 * (1) * 4)
8403       //    D2          1           2         20   (1 * (1 * 5) * 4)
8404       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8405       const Expr *StrideExpr = OASE->getStride();
8406       llvm::Value *Stride =
8407           StrideExpr
8408               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8409                                           CGF.Int64Ty, /*isSigned=*/false)
8410               : nullptr;
8411       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8412       if (Stride)
8413         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8414       else
8415         CurStrides.push_back(DimProd);
8416       if (DI != DimSizes.end())
8417         ++DI;
8418     }
8419 
8420     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8421     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8422     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8423   }
8424 
8425   /// Return the adjusted map modifiers if the declaration a capture refers to
8426   /// appears in a first-private clause. This is expected to be used only with
8427   /// directives that start with 'target'.
8428   MappableExprsHandler::OpenMPOffloadMappingFlags
8429   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8430     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8431 
8432     // A first private variable captured by reference will use only the
8433     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8434     // declaration is known as first-private in this handler.
8435     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8436       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8437         return MappableExprsHandler::OMP_MAP_TO |
8438                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8439       return MappableExprsHandler::OMP_MAP_PRIVATE |
8440              MappableExprsHandler::OMP_MAP_TO;
8441     }
8442     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8443     if (I != LambdasMap.end())
8444       // for map(to: lambda): using user specified map type.
8445       return getMapTypeBits(
8446           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8447           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8448           /*AddPtrFlag=*/false,
8449           /*AddIsTargetParamFlag=*/false,
8450           /*isNonContiguous=*/false);
8451     return MappableExprsHandler::OMP_MAP_TO |
8452            MappableExprsHandler::OMP_MAP_FROM;
8453   }
8454 
8455   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8456     // Rotate by getFlagMemberOffset() bits.
8457     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8458                                                   << getFlagMemberOffset());
8459   }
8460 
8461   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8462                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8463     // If the entry is PTR_AND_OBJ but has not been marked with the special
8464     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8465     // marked as MEMBER_OF.
8466     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8467         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8468       return;
8469 
8470     // Reset the placeholder value to prepare the flag for the assignment of the
8471     // proper MEMBER_OF value.
8472     Flags &= ~OMP_MAP_MEMBER_OF;
8473     Flags |= MemberOfFlag;
8474   }
8475 
8476   void getPlainLayout(const CXXRecordDecl *RD,
8477                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8478                       bool AsBase) const {
8479     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8480 
8481     llvm::StructType *St =
8482         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8483 
8484     unsigned NumElements = St->getNumElements();
8485     llvm::SmallVector<
8486         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8487         RecordLayout(NumElements);
8488 
8489     // Fill bases.
8490     for (const auto &I : RD->bases()) {
8491       if (I.isVirtual())
8492         continue;
8493       const auto *Base = I.getType()->getAsCXXRecordDecl();
8494       // Ignore empty bases.
8495       if (Base->isEmpty() || CGF.getContext()
8496                                  .getASTRecordLayout(Base)
8497                                  .getNonVirtualSize()
8498                                  .isZero())
8499         continue;
8500 
8501       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8502       RecordLayout[FieldIndex] = Base;
8503     }
8504     // Fill in virtual bases.
8505     for (const auto &I : RD->vbases()) {
8506       const auto *Base = I.getType()->getAsCXXRecordDecl();
8507       // Ignore empty bases.
8508       if (Base->isEmpty())
8509         continue;
8510       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8511       if (RecordLayout[FieldIndex])
8512         continue;
8513       RecordLayout[FieldIndex] = Base;
8514     }
8515     // Fill in all the fields.
8516     assert(!RD->isUnion() && "Unexpected union.");
8517     for (const auto *Field : RD->fields()) {
8518       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8519       // will fill in later.)
8520       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8521         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8522         RecordLayout[FieldIndex] = Field;
8523       }
8524     }
8525     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8526              &Data : RecordLayout) {
8527       if (Data.isNull())
8528         continue;
8529       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8530         getPlainLayout(Base, Layout, /*AsBase=*/true);
8531       else
8532         Layout.push_back(Data.get<const FieldDecl *>());
8533     }
8534   }
8535 
8536   /// Generate all the base pointers, section pointers, sizes, map types, and
8537   /// mappers for the extracted mappable expressions (all included in \a
8538   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8539   /// pair of the relevant declaration and index where it occurs is appended to
8540   /// the device pointers info array.
8541   void generateAllInfoForClauses(
8542       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8543       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8544           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8545     // We have to process the component lists that relate with the same
8546     // declaration in a single chunk so that we can generate the map flags
8547     // correctly. Therefore, we organize all lists in a map.
8548     enum MapKind { Present, Allocs, Other, Total };
8549     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8550                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8551         Info;
8552 
8553     // Helper function to fill the information map for the different supported
8554     // clauses.
8555     auto &&InfoGen =
8556         [&Info, &SkipVarSet](
8557             const ValueDecl *D, MapKind Kind,
8558             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8559             OpenMPMapClauseKind MapType,
8560             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8561             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8562             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8563             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8564           if (SkipVarSet.contains(D))
8565             return;
8566           auto It = Info.find(D);
8567           if (It == Info.end())
8568             It = Info
8569                      .insert(std::make_pair(
8570                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8571                      .first;
8572           It->second[Kind].emplace_back(
8573               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8574               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8575         };
8576 
8577     for (const auto *Cl : Clauses) {
8578       const auto *C = dyn_cast<OMPMapClause>(Cl);
8579       if (!C)
8580         continue;
8581       MapKind Kind = Other;
8582       if (llvm::is_contained(C->getMapTypeModifiers(),
8583                              OMPC_MAP_MODIFIER_present))
8584         Kind = Present;
8585       else if (C->getMapType() == OMPC_MAP_alloc)
8586         Kind = Allocs;
8587       const auto *EI = C->getVarRefs().begin();
8588       for (const auto L : C->component_lists()) {
8589         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8590         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8591                 C->getMapTypeModifiers(), llvm::None,
8592                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8593                 E);
8594         ++EI;
8595       }
8596     }
8597     for (const auto *Cl : Clauses) {
8598       const auto *C = dyn_cast<OMPToClause>(Cl);
8599       if (!C)
8600         continue;
8601       MapKind Kind = Other;
8602       if (llvm::is_contained(C->getMotionModifiers(),
8603                              OMPC_MOTION_MODIFIER_present))
8604         Kind = Present;
8605       const auto *EI = C->getVarRefs().begin();
8606       for (const auto L : C->component_lists()) {
8607         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8608                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8609                 C->isImplicit(), std::get<2>(L), *EI);
8610         ++EI;
8611       }
8612     }
8613     for (const auto *Cl : Clauses) {
8614       const auto *C = dyn_cast<OMPFromClause>(Cl);
8615       if (!C)
8616         continue;
8617       MapKind Kind = Other;
8618       if (llvm::is_contained(C->getMotionModifiers(),
8619                              OMPC_MOTION_MODIFIER_present))
8620         Kind = Present;
8621       const auto *EI = C->getVarRefs().begin();
8622       for (const auto L : C->component_lists()) {
8623         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8624                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8625                 C->isImplicit(), std::get<2>(L), *EI);
8626         ++EI;
8627       }
8628     }
8629 
8630     // Look at the use_device_ptr clause information and mark the existing map
8631     // entries as such. If there is no map information for an entry in the
8632     // use_device_ptr list, we create one with map type 'alloc' and zero size
8633     // section. It is the user fault if that was not mapped before. If there is
8634     // no map information and the pointer is a struct member, then we defer the
8635     // emission of that entry until the whole struct has been processed.
8636     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8637                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8638         DeferredInfo;
8639     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8640 
8641     for (const auto *Cl : Clauses) {
8642       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8643       if (!C)
8644         continue;
8645       for (const auto L : C->component_lists()) {
8646         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8647             std::get<1>(L);
8648         assert(!Components.empty() &&
8649                "Not expecting empty list of components!");
8650         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8651         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8652         const Expr *IE = Components.back().getAssociatedExpression();
8653         // If the first component is a member expression, we have to look into
8654         // 'this', which maps to null in the map of map information. Otherwise
8655         // look directly for the information.
8656         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8657 
8658         // We potentially have map information for this declaration already.
8659         // Look for the first set of components that refer to it.
8660         if (It != Info.end()) {
8661           bool Found = false;
8662           for (auto &Data : It->second) {
8663             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8664               return MI.Components.back().getAssociatedDeclaration() == VD;
8665             });
8666             // If we found a map entry, signal that the pointer has to be
8667             // returned and move on to the next declaration. Exclude cases where
8668             // the base pointer is mapped as array subscript, array section or
8669             // array shaping. The base address is passed as a pointer to base in
8670             // this case and cannot be used as a base for use_device_ptr list
8671             // item.
8672             if (CI != Data.end()) {
8673               auto PrevCI = std::next(CI->Components.rbegin());
8674               const auto *VarD = dyn_cast<VarDecl>(VD);
8675               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8676                   isa<MemberExpr>(IE) ||
8677                   !VD->getType().getNonReferenceType()->isPointerType() ||
8678                   PrevCI == CI->Components.rend() ||
8679                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8680                   VarD->hasLocalStorage()) {
8681                 CI->ReturnDevicePointer = true;
8682                 Found = true;
8683                 break;
8684               }
8685             }
8686           }
8687           if (Found)
8688             continue;
8689         }
8690 
8691         // We didn't find any match in our map information - generate a zero
8692         // size array section - if the pointer is a struct member we defer this
8693         // action until the whole struct has been processed.
8694         if (isa<MemberExpr>(IE)) {
8695           // Insert the pointer into Info to be processed by
8696           // generateInfoForComponentList. Because it is a member pointer
8697           // without a pointee, no entry will be generated for it, therefore
8698           // we need to generate one after the whole struct has been processed.
8699           // Nonetheless, generateInfoForComponentList must be called to take
8700           // the pointer into account for the calculation of the range of the
8701           // partial struct.
8702           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8703                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8704                   nullptr);
8705           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8706         } else {
8707           llvm::Value *Ptr =
8708               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8709           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8710           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8711           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8712           UseDevicePtrCombinedInfo.Sizes.push_back(
8713               llvm::Constant::getNullValue(CGF.Int64Ty));
8714           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8715           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8716         }
8717       }
8718     }
8719 
8720     // Look at the use_device_addr clause information and mark the existing map
8721     // entries as such. If there is no map information for an entry in the
8722     // use_device_addr list, we create one with map type 'alloc' and zero size
8723     // section. It is the user fault if that was not mapped before. If there is
8724     // no map information and the pointer is a struct member, then we defer the
8725     // emission of that entry until the whole struct has been processed.
8726     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8727     for (const auto *Cl : Clauses) {
8728       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8729       if (!C)
8730         continue;
8731       for (const auto L : C->component_lists()) {
8732         assert(!std::get<1>(L).empty() &&
8733                "Not expecting empty list of components!");
8734         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8735         if (!Processed.insert(VD).second)
8736           continue;
8737         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8738         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8739         // If the first component is a member expression, we have to look into
8740         // 'this', which maps to null in the map of map information. Otherwise
8741         // look directly for the information.
8742         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8743 
8744         // We potentially have map information for this declaration already.
8745         // Look for the first set of components that refer to it.
8746         if (It != Info.end()) {
8747           bool Found = false;
8748           for (auto &Data : It->second) {
8749             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8750               return MI.Components.back().getAssociatedDeclaration() == VD;
8751             });
8752             // If we found a map entry, signal that the pointer has to be
8753             // returned and move on to the next declaration.
8754             if (CI != Data.end()) {
8755               CI->ReturnDevicePointer = true;
8756               Found = true;
8757               break;
8758             }
8759           }
8760           if (Found)
8761             continue;
8762         }
8763 
8764         // We didn't find any match in our map information - generate a zero
8765         // size array section - if the pointer is a struct member we defer this
8766         // action until the whole struct has been processed.
8767         if (isa<MemberExpr>(IE)) {
8768           // Insert the pointer into Info to be processed by
8769           // generateInfoForComponentList. Because it is a member pointer
8770           // without a pointee, no entry will be generated for it, therefore
8771           // we need to generate one after the whole struct has been processed.
8772           // Nonetheless, generateInfoForComponentList must be called to take
8773           // the pointer into account for the calculation of the range of the
8774           // partial struct.
8775           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8776                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8777                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8778           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8779         } else {
8780           llvm::Value *Ptr;
8781           if (IE->isGLValue())
8782             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8783           else
8784             Ptr = CGF.EmitScalarExpr(IE);
8785           CombinedInfo.Exprs.push_back(VD);
8786           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8787           CombinedInfo.Pointers.push_back(Ptr);
8788           CombinedInfo.Sizes.push_back(
8789               llvm::Constant::getNullValue(CGF.Int64Ty));
8790           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8791           CombinedInfo.Mappers.push_back(nullptr);
8792         }
8793       }
8794     }
8795 
8796     for (const auto &Data : Info) {
8797       StructRangeInfoTy PartialStruct;
8798       // Temporary generated information.
8799       MapCombinedInfoTy CurInfo;
8800       const Decl *D = Data.first;
8801       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8802       for (const auto &M : Data.second) {
8803         for (const MapInfo &L : M) {
8804           assert(!L.Components.empty() &&
8805                  "Not expecting declaration with no component lists.");
8806 
8807           // Remember the current base pointer index.
8808           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8809           CurInfo.NonContigInfo.IsNonContiguous =
8810               L.Components.back().isNonContiguous();
8811           generateInfoForComponentList(
8812               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8813               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8814               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8815 
8816           // If this entry relates with a device pointer, set the relevant
8817           // declaration and add the 'return pointer' flag.
8818           if (L.ReturnDevicePointer) {
8819             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8820                    "Unexpected number of mapped base pointers.");
8821 
8822             const ValueDecl *RelevantVD =
8823                 L.Components.back().getAssociatedDeclaration();
8824             assert(RelevantVD &&
8825                    "No relevant declaration related with device pointer??");
8826 
8827             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8828                 RelevantVD);
8829             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8830           }
8831         }
8832       }
8833 
8834       // Append any pending zero-length pointers which are struct members and
8835       // used with use_device_ptr or use_device_addr.
8836       auto CI = DeferredInfo.find(Data.first);
8837       if (CI != DeferredInfo.end()) {
8838         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8839           llvm::Value *BasePtr;
8840           llvm::Value *Ptr;
8841           if (L.ForDeviceAddr) {
8842             if (L.IE->isGLValue())
8843               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8844             else
8845               Ptr = this->CGF.EmitScalarExpr(L.IE);
8846             BasePtr = Ptr;
8847             // Entry is RETURN_PARAM. Also, set the placeholder value
8848             // MEMBER_OF=FFFF so that the entry is later updated with the
8849             // correct value of MEMBER_OF.
8850             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8851           } else {
8852             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8853             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8854                                              L.IE->getExprLoc());
8855             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8856             // placeholder value MEMBER_OF=FFFF so that the entry is later
8857             // updated with the correct value of MEMBER_OF.
8858             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8859                                     OMP_MAP_MEMBER_OF);
8860           }
8861           CurInfo.Exprs.push_back(L.VD);
8862           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8863           CurInfo.Pointers.push_back(Ptr);
8864           CurInfo.Sizes.push_back(
8865               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8866           CurInfo.Mappers.push_back(nullptr);
8867         }
8868       }
8869       // If there is an entry in PartialStruct it means we have a struct with
8870       // individual members mapped. Emit an extra combined entry.
8871       if (PartialStruct.Base.isValid()) {
8872         CurInfo.NonContigInfo.Dims.push_back(0);
8873         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8874       }
8875 
8876       // We need to append the results of this capture to what we already
8877       // have.
8878       CombinedInfo.append(CurInfo);
8879     }
8880     // Append data for use_device_ptr clauses.
8881     CombinedInfo.append(UseDevicePtrCombinedInfo);
8882   }
8883 
8884 public:
8885   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8886       : CurDir(&Dir), CGF(CGF) {
8887     // Extract firstprivate clause information.
8888     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8889       for (const auto *D : C->varlists())
8890         FirstPrivateDecls.try_emplace(
8891             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8892     // Extract implicit firstprivates from uses_allocators clauses.
8893     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8894       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8895         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8896         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8897           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8898                                         /*Implicit=*/true);
8899         else if (const auto *VD = dyn_cast<VarDecl>(
8900                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8901                          ->getDecl()))
8902           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8903       }
8904     }
8905     // Extract device pointer clause information.
8906     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8907       for (auto L : C->component_lists())
8908         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8909     // Extract map information.
8910     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8911       if (C->getMapType() != OMPC_MAP_to)
8912         continue;
8913       for (auto L : C->component_lists()) {
8914         const ValueDecl *VD = std::get<0>(L);
8915         const auto *RD = VD ? VD->getType()
8916                                   .getCanonicalType()
8917                                   .getNonReferenceType()
8918                                   ->getAsCXXRecordDecl()
8919                             : nullptr;
8920         if (RD && RD->isLambda())
8921           LambdasMap.try_emplace(std::get<0>(L), C);
8922       }
8923     }
8924   }
8925 
8926   /// Constructor for the declare mapper directive.
8927   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8928       : CurDir(&Dir), CGF(CGF) {}
8929 
8930   /// Generate code for the combined entry if we have a partially mapped struct
8931   /// and take care of the mapping flags of the arguments corresponding to
8932   /// individual struct members.
8933   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8934                          MapFlagsArrayTy &CurTypes,
8935                          const StructRangeInfoTy &PartialStruct,
8936                          const ValueDecl *VD = nullptr,
8937                          bool NotTargetParams = true) const {
8938     if (CurTypes.size() == 1 &&
8939         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8940         !PartialStruct.IsArraySection)
8941       return;
8942     Address LBAddr = PartialStruct.LowestElem.second;
8943     Address HBAddr = PartialStruct.HighestElem.second;
8944     if (PartialStruct.HasCompleteRecord) {
8945       LBAddr = PartialStruct.LB;
8946       HBAddr = PartialStruct.LB;
8947     }
8948     CombinedInfo.Exprs.push_back(VD);
8949     // Base is the base of the struct
8950     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8951     // Pointer is the address of the lowest element
8952     llvm::Value *LB = LBAddr.getPointer();
8953     CombinedInfo.Pointers.push_back(LB);
8954     // There should not be a mapper for a combined entry.
8955     CombinedInfo.Mappers.push_back(nullptr);
8956     // Size is (addr of {highest+1} element) - (addr of lowest element)
8957     llvm::Value *HB = HBAddr.getPointer();
8958     llvm::Value *HAddr =
8959         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8960     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8961     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8962     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8963     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8964                                                   /*isSigned=*/false);
8965     CombinedInfo.Sizes.push_back(Size);
8966     // Map type is always TARGET_PARAM, if generate info for captures.
8967     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8968                                                  : OMP_MAP_TARGET_PARAM);
8969     // If any element has the present modifier, then make sure the runtime
8970     // doesn't attempt to allocate the struct.
8971     if (CurTypes.end() !=
8972         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8973           return Type & OMP_MAP_PRESENT;
8974         }))
8975       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8976     // Remove TARGET_PARAM flag from the first element
8977     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8978     // If any element has the ompx_hold modifier, then make sure the runtime
8979     // uses the hold reference count for the struct as a whole so that it won't
8980     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8981     // elements as well so the runtime knows which reference count to check
8982     // when determining whether it's time for device-to-host transfers of
8983     // individual elements.
8984     if (CurTypes.end() !=
8985         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8986           return Type & OMP_MAP_OMPX_HOLD;
8987         })) {
8988       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8989       for (auto &M : CurTypes)
8990         M |= OMP_MAP_OMPX_HOLD;
8991     }
8992 
8993     // All other current entries will be MEMBER_OF the combined entry
8994     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8995     // 0xFFFF in the MEMBER_OF field).
8996     OpenMPOffloadMappingFlags MemberOfFlag =
8997         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8998     for (auto &M : CurTypes)
8999       setCorrectMemberOfFlag(M, MemberOfFlag);
9000   }
9001 
9002   /// Generate all the base pointers, section pointers, sizes, map types, and
9003   /// mappers for the extracted mappable expressions (all included in \a
9004   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9005   /// pair of the relevant declaration and index where it occurs is appended to
9006   /// the device pointers info array.
9007   void generateAllInfo(
9008       MapCombinedInfoTy &CombinedInfo,
9009       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9010           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9011     assert(CurDir.is<const OMPExecutableDirective *>() &&
9012            "Expect a executable directive");
9013     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9014     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9015   }
9016 
9017   /// Generate all the base pointers, section pointers, sizes, map types, and
9018   /// mappers for the extracted map clauses of user-defined mapper (all included
9019   /// in \a CombinedInfo).
9020   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9021     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9022            "Expect a declare mapper directive");
9023     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9024     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9025   }
9026 
9027   /// Emit capture info for lambdas for variables captured by reference.
9028   void generateInfoForLambdaCaptures(
9029       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9030       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9031     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9032     const auto *RD = VDType->getAsCXXRecordDecl();
9033     if (!RD || !RD->isLambda())
9034       return;
9035     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9036                    CGF.getContext().getDeclAlign(VD));
9037     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9038     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9039     FieldDecl *ThisCapture = nullptr;
9040     RD->getCaptureFields(Captures, ThisCapture);
9041     if (ThisCapture) {
9042       LValue ThisLVal =
9043           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9044       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9045       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9046                                  VDLVal.getPointer(CGF));
9047       CombinedInfo.Exprs.push_back(VD);
9048       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9049       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9050       CombinedInfo.Sizes.push_back(
9051           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9052                                     CGF.Int64Ty, /*isSigned=*/true));
9053       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9054                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9055       CombinedInfo.Mappers.push_back(nullptr);
9056     }
9057     for (const LambdaCapture &LC : RD->captures()) {
9058       if (!LC.capturesVariable())
9059         continue;
9060       const VarDecl *VD = LC.getCapturedVar();
9061       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9062         continue;
9063       auto It = Captures.find(VD);
9064       assert(It != Captures.end() && "Found lambda capture without field.");
9065       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9066       if (LC.getCaptureKind() == LCK_ByRef) {
9067         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9068         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9069                                    VDLVal.getPointer(CGF));
9070         CombinedInfo.Exprs.push_back(VD);
9071         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9072         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9073         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9074             CGF.getTypeSize(
9075                 VD->getType().getCanonicalType().getNonReferenceType()),
9076             CGF.Int64Ty, /*isSigned=*/true));
9077       } else {
9078         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9079         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9080                                    VDLVal.getPointer(CGF));
9081         CombinedInfo.Exprs.push_back(VD);
9082         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9083         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9084         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9085       }
9086       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9087                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9088       CombinedInfo.Mappers.push_back(nullptr);
9089     }
9090   }
9091 
9092   /// Set correct indices for lambdas captures.
9093   void adjustMemberOfForLambdaCaptures(
9094       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9095       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9096       MapFlagsArrayTy &Types) const {
9097     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9098       // Set correct member_of idx for all implicit lambda captures.
9099       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9100                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9101         continue;
9102       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9103       assert(BasePtr && "Unable to find base lambda address.");
9104       int TgtIdx = -1;
9105       for (unsigned J = I; J > 0; --J) {
9106         unsigned Idx = J - 1;
9107         if (Pointers[Idx] != BasePtr)
9108           continue;
9109         TgtIdx = Idx;
9110         break;
9111       }
9112       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9113       // All other current entries will be MEMBER_OF the combined entry
9114       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9115       // 0xFFFF in the MEMBER_OF field).
9116       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9117       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9118     }
9119   }
9120 
9121   /// Generate the base pointers, section pointers, sizes, map types, and
9122   /// mappers associated to a given capture (all included in \a CombinedInfo).
9123   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9124                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9125                               StructRangeInfoTy &PartialStruct) const {
9126     assert(!Cap->capturesVariableArrayType() &&
9127            "Not expecting to generate map info for a variable array type!");
9128 
9129     // We need to know when we generating information for the first component
9130     const ValueDecl *VD = Cap->capturesThis()
9131                               ? nullptr
9132                               : Cap->getCapturedVar()->getCanonicalDecl();
9133 
9134     // for map(to: lambda): skip here, processing it in
9135     // generateDefaultMapInfo
9136     if (LambdasMap.count(VD))
9137       return;
9138 
9139     // If this declaration appears in a is_device_ptr clause we just have to
9140     // pass the pointer by value. If it is a reference to a declaration, we just
9141     // pass its value.
9142     if (DevPointersMap.count(VD)) {
9143       CombinedInfo.Exprs.push_back(VD);
9144       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9145       CombinedInfo.Pointers.push_back(Arg);
9146       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9147           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9148           /*isSigned=*/true));
9149       CombinedInfo.Types.push_back(
9150           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9151           OMP_MAP_TARGET_PARAM);
9152       CombinedInfo.Mappers.push_back(nullptr);
9153       return;
9154     }
9155 
9156     using MapData =
9157         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9158                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9159                    const ValueDecl *, const Expr *>;
9160     SmallVector<MapData, 4> DeclComponentLists;
9161     assert(CurDir.is<const OMPExecutableDirective *>() &&
9162            "Expect a executable directive");
9163     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9164     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9165       const auto *EI = C->getVarRefs().begin();
9166       for (const auto L : C->decl_component_lists(VD)) {
9167         const ValueDecl *VDecl, *Mapper;
9168         // The Expression is not correct if the mapping is implicit
9169         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9170         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9171         std::tie(VDecl, Components, Mapper) = L;
9172         assert(VDecl == VD && "We got information for the wrong declaration??");
9173         assert(!Components.empty() &&
9174                "Not expecting declaration with no component lists.");
9175         DeclComponentLists.emplace_back(Components, C->getMapType(),
9176                                         C->getMapTypeModifiers(),
9177                                         C->isImplicit(), Mapper, E);
9178         ++EI;
9179       }
9180     }
9181     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9182                                              const MapData &RHS) {
9183       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9184       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9185       bool HasPresent =
9186           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9187       bool HasAllocs = MapType == OMPC_MAP_alloc;
9188       MapModifiers = std::get<2>(RHS);
9189       MapType = std::get<1>(LHS);
9190       bool HasPresentR =
9191           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9192       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9193       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9194     });
9195 
9196     // Find overlapping elements (including the offset from the base element).
9197     llvm::SmallDenseMap<
9198         const MapData *,
9199         llvm::SmallVector<
9200             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9201         4>
9202         OverlappedData;
9203     size_t Count = 0;
9204     for (const MapData &L : DeclComponentLists) {
9205       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9206       OpenMPMapClauseKind MapType;
9207       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9208       bool IsImplicit;
9209       const ValueDecl *Mapper;
9210       const Expr *VarRef;
9211       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9212           L;
9213       ++Count;
9214       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9215         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9216         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9217                  VarRef) = L1;
9218         auto CI = Components.rbegin();
9219         auto CE = Components.rend();
9220         auto SI = Components1.rbegin();
9221         auto SE = Components1.rend();
9222         for (; CI != CE && SI != SE; ++CI, ++SI) {
9223           if (CI->getAssociatedExpression()->getStmtClass() !=
9224               SI->getAssociatedExpression()->getStmtClass())
9225             break;
9226           // Are we dealing with different variables/fields?
9227           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9228             break;
9229         }
9230         // Found overlapping if, at least for one component, reached the head
9231         // of the components list.
9232         if (CI == CE || SI == SE) {
9233           // Ignore it if it is the same component.
9234           if (CI == CE && SI == SE)
9235             continue;
9236           const auto It = (SI == SE) ? CI : SI;
9237           // If one component is a pointer and another one is a kind of
9238           // dereference of this pointer (array subscript, section, dereference,
9239           // etc.), it is not an overlapping.
9240           // Same, if one component is a base and another component is a
9241           // dereferenced pointer memberexpr with the same base.
9242           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9243               (std::prev(It)->getAssociatedDeclaration() &&
9244                std::prev(It)
9245                    ->getAssociatedDeclaration()
9246                    ->getType()
9247                    ->isPointerType()) ||
9248               (It->getAssociatedDeclaration() &&
9249                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9250                std::next(It) != CE && std::next(It) != SE))
9251             continue;
9252           const MapData &BaseData = CI == CE ? L : L1;
9253           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9254               SI == SE ? Components : Components1;
9255           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9256           OverlappedElements.getSecond().push_back(SubData);
9257         }
9258       }
9259     }
9260     // Sort the overlapped elements for each item.
9261     llvm::SmallVector<const FieldDecl *, 4> Layout;
9262     if (!OverlappedData.empty()) {
9263       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9264       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9265       while (BaseType != OrigType) {
9266         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9267         OrigType = BaseType->getPointeeOrArrayElementType();
9268       }
9269 
9270       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9271         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9272       else {
9273         const auto *RD = BaseType->getAsRecordDecl();
9274         Layout.append(RD->field_begin(), RD->field_end());
9275       }
9276     }
9277     for (auto &Pair : OverlappedData) {
9278       llvm::stable_sort(
9279           Pair.getSecond(),
9280           [&Layout](
9281               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9282               OMPClauseMappableExprCommon::MappableExprComponentListRef
9283                   Second) {
9284             auto CI = First.rbegin();
9285             auto CE = First.rend();
9286             auto SI = Second.rbegin();
9287             auto SE = Second.rend();
9288             for (; CI != CE && SI != SE; ++CI, ++SI) {
9289               if (CI->getAssociatedExpression()->getStmtClass() !=
9290                   SI->getAssociatedExpression()->getStmtClass())
9291                 break;
9292               // Are we dealing with different variables/fields?
9293               if (CI->getAssociatedDeclaration() !=
9294                   SI->getAssociatedDeclaration())
9295                 break;
9296             }
9297 
9298             // Lists contain the same elements.
9299             if (CI == CE && SI == SE)
9300               return false;
9301 
9302             // List with less elements is less than list with more elements.
9303             if (CI == CE || SI == SE)
9304               return CI == CE;
9305 
9306             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9307             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9308             if (FD1->getParent() == FD2->getParent())
9309               return FD1->getFieldIndex() < FD2->getFieldIndex();
9310             const auto *It =
9311                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9312                   return FD == FD1 || FD == FD2;
9313                 });
9314             return *It == FD1;
9315           });
9316     }
9317 
9318     // Associated with a capture, because the mapping flags depend on it.
9319     // Go through all of the elements with the overlapped elements.
9320     bool IsFirstComponentList = true;
9321     for (const auto &Pair : OverlappedData) {
9322       const MapData &L = *Pair.getFirst();
9323       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9324       OpenMPMapClauseKind MapType;
9325       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9326       bool IsImplicit;
9327       const ValueDecl *Mapper;
9328       const Expr *VarRef;
9329       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9330           L;
9331       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9332           OverlappedComponents = Pair.getSecond();
9333       generateInfoForComponentList(
9334           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9335           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9336           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9337       IsFirstComponentList = false;
9338     }
9339     // Go through other elements without overlapped elements.
9340     for (const MapData &L : DeclComponentLists) {
9341       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9342       OpenMPMapClauseKind MapType;
9343       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9344       bool IsImplicit;
9345       const ValueDecl *Mapper;
9346       const Expr *VarRef;
9347       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9348           L;
9349       auto It = OverlappedData.find(&L);
9350       if (It == OverlappedData.end())
9351         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9352                                      Components, CombinedInfo, PartialStruct,
9353                                      IsFirstComponentList, IsImplicit, Mapper,
9354                                      /*ForDeviceAddr=*/false, VD, VarRef);
9355       IsFirstComponentList = false;
9356     }
9357   }
9358 
9359   /// Generate the default map information for a given capture \a CI,
9360   /// record field declaration \a RI and captured value \a CV.
9361   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9362                               const FieldDecl &RI, llvm::Value *CV,
9363                               MapCombinedInfoTy &CombinedInfo) const {
9364     bool IsImplicit = true;
9365     // Do the default mapping.
9366     if (CI.capturesThis()) {
9367       CombinedInfo.Exprs.push_back(nullptr);
9368       CombinedInfo.BasePointers.push_back(CV);
9369       CombinedInfo.Pointers.push_back(CV);
9370       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9371       CombinedInfo.Sizes.push_back(
9372           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9373                                     CGF.Int64Ty, /*isSigned=*/true));
9374       // Default map type.
9375       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9376     } else if (CI.capturesVariableByCopy()) {
9377       const VarDecl *VD = CI.getCapturedVar();
9378       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9379       CombinedInfo.BasePointers.push_back(CV);
9380       CombinedInfo.Pointers.push_back(CV);
9381       if (!RI.getType()->isAnyPointerType()) {
9382         // We have to signal to the runtime captures passed by value that are
9383         // not pointers.
9384         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9385         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9386             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9387       } else {
9388         // Pointers are implicitly mapped with a zero size and no flags
9389         // (other than first map that is added for all implicit maps).
9390         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9391         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9392       }
9393       auto I = FirstPrivateDecls.find(VD);
9394       if (I != FirstPrivateDecls.end())
9395         IsImplicit = I->getSecond();
9396     } else {
9397       assert(CI.capturesVariable() && "Expected captured reference.");
9398       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9399       QualType ElementType = PtrTy->getPointeeType();
9400       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9401           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9402       // The default map type for a scalar/complex type is 'to' because by
9403       // default the value doesn't have to be retrieved. For an aggregate
9404       // type, the default is 'tofrom'.
9405       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9406       const VarDecl *VD = CI.getCapturedVar();
9407       auto I = FirstPrivateDecls.find(VD);
9408       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9409       CombinedInfo.BasePointers.push_back(CV);
9410       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9411         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9412             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9413             AlignmentSource::Decl));
9414         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9415       } else {
9416         CombinedInfo.Pointers.push_back(CV);
9417       }
9418       if (I != FirstPrivateDecls.end())
9419         IsImplicit = I->getSecond();
9420     }
9421     // Every default map produces a single argument which is a target parameter.
9422     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9423 
9424     // Add flag stating this is an implicit map.
9425     if (IsImplicit)
9426       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9427 
9428     // No user-defined mapper for default mapping.
9429     CombinedInfo.Mappers.push_back(nullptr);
9430   }
9431 };
9432 } // anonymous namespace
9433 
9434 static void emitNonContiguousDescriptor(
9435     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9436     CGOpenMPRuntime::TargetDataInfo &Info) {
9437   CodeGenModule &CGM = CGF.CGM;
9438   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9439       &NonContigInfo = CombinedInfo.NonContigInfo;
9440 
9441   // Build an array of struct descriptor_dim and then assign it to
9442   // offload_args.
9443   //
9444   // struct descriptor_dim {
9445   //  uint64_t offset;
9446   //  uint64_t count;
9447   //  uint64_t stride
9448   // };
9449   ASTContext &C = CGF.getContext();
9450   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9451   RecordDecl *RD;
9452   RD = C.buildImplicitRecord("descriptor_dim");
9453   RD->startDefinition();
9454   addFieldToRecordDecl(C, RD, Int64Ty);
9455   addFieldToRecordDecl(C, RD, Int64Ty);
9456   addFieldToRecordDecl(C, RD, Int64Ty);
9457   RD->completeDefinition();
9458   QualType DimTy = C.getRecordType(RD);
9459 
9460   enum { OffsetFD = 0, CountFD, StrideFD };
9461   // We need two index variable here since the size of "Dims" is the same as the
9462   // size of Components, however, the size of offset, count, and stride is equal
9463   // to the size of base declaration that is non-contiguous.
9464   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9465     // Skip emitting ir if dimension size is 1 since it cannot be
9466     // non-contiguous.
9467     if (NonContigInfo.Dims[I] == 1)
9468       continue;
9469     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9470     QualType ArrayTy =
9471         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9472     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9473     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9474       unsigned RevIdx = EE - II - 1;
9475       LValue DimsLVal = CGF.MakeAddrLValue(
9476           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9477       // Offset
9478       LValue OffsetLVal = CGF.EmitLValueForField(
9479           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9480       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9481       // Count
9482       LValue CountLVal = CGF.EmitLValueForField(
9483           DimsLVal, *std::next(RD->field_begin(), CountFD));
9484       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9485       // Stride
9486       LValue StrideLVal = CGF.EmitLValueForField(
9487           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9488       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9489     }
9490     // args[I] = &dims
9491     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9492         DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9493     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9494         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9495         Info.PointersArray, 0, I);
9496     Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
9497     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9498     ++L;
9499   }
9500 }
9501 
9502 // Try to extract the base declaration from a `this->x` expression if possible.
9503 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9504   if (!E)
9505     return nullptr;
9506 
9507   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9508     if (const MemberExpr *ME =
9509             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9510       return ME->getMemberDecl();
9511   return nullptr;
9512 }
9513 
9514 /// Emit a string constant containing the names of the values mapped to the
9515 /// offloading runtime library.
9516 llvm::Constant *
9517 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9518                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9519 
9520   uint32_t SrcLocStrSize;
9521   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9522     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9523 
9524   SourceLocation Loc;
9525   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9526     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9527       Loc = VD->getLocation();
9528     else
9529       Loc = MapExprs.getMapExpr()->getExprLoc();
9530   } else {
9531     Loc = MapExprs.getMapDecl()->getLocation();
9532   }
9533 
9534   std::string ExprName;
9535   if (MapExprs.getMapExpr()) {
9536     PrintingPolicy P(CGF.getContext().getLangOpts());
9537     llvm::raw_string_ostream OS(ExprName);
9538     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9539     OS.flush();
9540   } else {
9541     ExprName = MapExprs.getMapDecl()->getNameAsString();
9542   }
9543 
9544   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9545   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9546                                          PLoc.getLine(), PLoc.getColumn(),
9547                                          SrcLocStrSize);
9548 }
9549 
9550 /// Emit the arrays used to pass the captures and map information to the
9551 /// offloading runtime library. If there is no map or capture information,
9552 /// return nullptr by reference.
9553 static void emitOffloadingArrays(
9554     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9555     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9556     bool IsNonContiguous = false) {
9557   CodeGenModule &CGM = CGF.CGM;
9558   ASTContext &Ctx = CGF.getContext();
9559 
9560   // Reset the array information.
9561   Info.clearArrayInfo();
9562   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9563 
9564   if (Info.NumberOfPtrs) {
9565     // Detect if we have any capture size requiring runtime evaluation of the
9566     // size so that a constant array could be eventually used.
9567 
9568     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9569     QualType PointerArrayType = Ctx.getConstantArrayType(
9570         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9571         /*IndexTypeQuals=*/0);
9572 
9573     Info.BasePointersArray =
9574         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9575     Info.PointersArray =
9576         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9577     Address MappersArray =
9578         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9579     Info.MappersArray = MappersArray.getPointer();
9580 
9581     // If we don't have any VLA types or other types that require runtime
9582     // evaluation, we can use a constant array for the map sizes, otherwise we
9583     // need to fill up the arrays as we do for the pointers.
9584     QualType Int64Ty =
9585         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9586     SmallVector<llvm::Constant *> ConstSizes(
9587         CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9588     llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9589     for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9590       if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9591         if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9592           if (IsNonContiguous && (CombinedInfo.Types[I] &
9593                                   MappableExprsHandler::OMP_MAP_NON_CONTIG))
9594             ConstSizes[I] = llvm::ConstantInt::get(
9595                 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9596           else
9597             ConstSizes[I] = CI;
9598           continue;
9599         }
9600       }
9601       RuntimeSizes.set(I);
9602     }
9603 
9604     if (RuntimeSizes.all()) {
9605       QualType SizeArrayType = Ctx.getConstantArrayType(
9606           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9607           /*IndexTypeQuals=*/0);
9608       Info.SizesArray =
9609           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9610     } else {
9611       auto *SizesArrayInit = llvm::ConstantArray::get(
9612           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9613       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9614       auto *SizesArrayGbl = new llvm::GlobalVariable(
9615           CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9616           llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9617       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9618       if (RuntimeSizes.any()) {
9619         QualType SizeArrayType = Ctx.getConstantArrayType(
9620             Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9621             /*IndexTypeQuals=*/0);
9622         Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9623         llvm::Value *GblConstPtr =
9624             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9625                 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9626         CGF.Builder.CreateMemCpy(
9627             Buffer,
9628             Address(GblConstPtr, CGM.Int64Ty,
9629                     CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9630                         /*DestWidth=*/64, /*Signed=*/false))),
9631             CGF.getTypeSize(SizeArrayType));
9632         Info.SizesArray = Buffer.getPointer();
9633       } else {
9634         Info.SizesArray = SizesArrayGbl;
9635       }
9636     }
9637 
9638     // The map types are always constant so we don't need to generate code to
9639     // fill arrays. Instead, we create an array constant.
9640     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9641     llvm::copy(CombinedInfo.Types, Mapping.begin());
9642     std::string MaptypesName =
9643         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9644     auto *MapTypesArrayGbl =
9645         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9646     Info.MapTypesArray = MapTypesArrayGbl;
9647 
9648     // The information types are only built if there is debug information
9649     // requested.
9650     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9651       Info.MapNamesArray = llvm::Constant::getNullValue(
9652           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9653     } else {
9654       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9655         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9656       };
9657       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9658       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9659       std::string MapnamesName =
9660           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9661       auto *MapNamesArrayGbl =
9662           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9663       Info.MapNamesArray = MapNamesArrayGbl;
9664     }
9665 
9666     // If there's a present map type modifier, it must not be applied to the end
9667     // of a region, so generate a separate map type array in that case.
9668     if (Info.separateBeginEndCalls()) {
9669       bool EndMapTypesDiffer = false;
9670       for (uint64_t &Type : Mapping) {
9671         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9672           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9673           EndMapTypesDiffer = true;
9674         }
9675       }
9676       if (EndMapTypesDiffer) {
9677         MapTypesArrayGbl =
9678             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9679         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9680       }
9681     }
9682 
9683     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9684       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9685       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9686           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9687           Info.BasePointersArray, 0, I);
9688       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9689           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9690       Address BPAddr(BP, BPVal->getType(),
9691                      Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9692       CGF.Builder.CreateStore(BPVal, BPAddr);
9693 
9694       if (Info.requiresDevicePointerInfo())
9695         if (const ValueDecl *DevVD =
9696                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9697           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9698 
9699       llvm::Value *PVal = CombinedInfo.Pointers[I];
9700       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9701           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9702           Info.PointersArray, 0, I);
9703       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9704           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9705       Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9706       CGF.Builder.CreateStore(PVal, PAddr);
9707 
9708       if (RuntimeSizes.test(I)) {
9709         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9710             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9711             Info.SizesArray,
9712             /*Idx0=*/0,
9713             /*Idx1=*/I);
9714         Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
9715         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9716                                                           CGM.Int64Ty,
9717                                                           /*isSigned=*/true),
9718                                 SAddr);
9719       }
9720 
9721       // Fill up the mapper array.
9722       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9723       if (CombinedInfo.Mappers[I]) {
9724         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9725             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9726         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9727         Info.HasMapper = true;
9728       }
9729       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9730       CGF.Builder.CreateStore(MFunc, MAddr);
9731     }
9732   }
9733 
9734   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9735       Info.NumberOfPtrs == 0)
9736     return;
9737 
9738   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9739 }
9740 
9741 namespace {
9742 /// Additional arguments for emitOffloadingArraysArgument function.
9743 struct ArgumentsOptions {
9744   bool ForEndCall = false;
9745   ArgumentsOptions() = default;
9746   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9747 };
9748 } // namespace
9749 
9750 /// Emit the arguments to be passed to the runtime library based on the
9751 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9752 /// ForEndCall, emit map types to be passed for the end of the region instead of
9753 /// the beginning.
9754 static void emitOffloadingArraysArgument(
9755     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9756     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9757     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9758     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9759     const ArgumentsOptions &Options = ArgumentsOptions()) {
9760   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9761          "expected region end call to runtime only when end call is separate");
9762   CodeGenModule &CGM = CGF.CGM;
9763   if (Info.NumberOfPtrs) {
9764     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9765         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9766         Info.BasePointersArray,
9767         /*Idx0=*/0, /*Idx1=*/0);
9768     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9769         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9770         Info.PointersArray,
9771         /*Idx0=*/0,
9772         /*Idx1=*/0);
9773     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9774         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9775         /*Idx0=*/0, /*Idx1=*/0);
9776     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9777         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9778         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9779                                                     : Info.MapTypesArray,
9780         /*Idx0=*/0,
9781         /*Idx1=*/0);
9782 
9783     // Only emit the mapper information arrays if debug information is
9784     // requested.
9785     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9786       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9787     else
9788       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9789           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9790           Info.MapNamesArray,
9791           /*Idx0=*/0,
9792           /*Idx1=*/0);
9793     // If there is no user-defined mapper, set the mapper array to nullptr to
9794     // avoid an unnecessary data privatization
9795     if (!Info.HasMapper)
9796       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9797     else
9798       MappersArrayArg =
9799           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9800   } else {
9801     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9802     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9803     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9804     MapTypesArrayArg =
9805         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9806     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9807     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9808   }
9809 }
9810 
9811 /// Check for inner distribute directive.
9812 static const OMPExecutableDirective *
9813 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9814   const auto *CS = D.getInnermostCapturedStmt();
9815   const auto *Body =
9816       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9817   const Stmt *ChildStmt =
9818       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9819 
9820   if (const auto *NestedDir =
9821           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9822     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9823     switch (D.getDirectiveKind()) {
9824     case OMPD_target:
9825       if (isOpenMPDistributeDirective(DKind))
9826         return NestedDir;
9827       if (DKind == OMPD_teams) {
9828         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9829             /*IgnoreCaptured=*/true);
9830         if (!Body)
9831           return nullptr;
9832         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9833         if (const auto *NND =
9834                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9835           DKind = NND->getDirectiveKind();
9836           if (isOpenMPDistributeDirective(DKind))
9837             return NND;
9838         }
9839       }
9840       return nullptr;
9841     case OMPD_target_teams:
9842       if (isOpenMPDistributeDirective(DKind))
9843         return NestedDir;
9844       return nullptr;
9845     case OMPD_target_parallel:
9846     case OMPD_target_simd:
9847     case OMPD_target_parallel_for:
9848     case OMPD_target_parallel_for_simd:
9849       return nullptr;
9850     case OMPD_target_teams_distribute:
9851     case OMPD_target_teams_distribute_simd:
9852     case OMPD_target_teams_distribute_parallel_for:
9853     case OMPD_target_teams_distribute_parallel_for_simd:
9854     case OMPD_parallel:
9855     case OMPD_for:
9856     case OMPD_parallel_for:
9857     case OMPD_parallel_master:
9858     case OMPD_parallel_sections:
9859     case OMPD_for_simd:
9860     case OMPD_parallel_for_simd:
9861     case OMPD_cancel:
9862     case OMPD_cancellation_point:
9863     case OMPD_ordered:
9864     case OMPD_threadprivate:
9865     case OMPD_allocate:
9866     case OMPD_task:
9867     case OMPD_simd:
9868     case OMPD_tile:
9869     case OMPD_unroll:
9870     case OMPD_sections:
9871     case OMPD_section:
9872     case OMPD_single:
9873     case OMPD_master:
9874     case OMPD_critical:
9875     case OMPD_taskyield:
9876     case OMPD_barrier:
9877     case OMPD_taskwait:
9878     case OMPD_taskgroup:
9879     case OMPD_atomic:
9880     case OMPD_flush:
9881     case OMPD_depobj:
9882     case OMPD_scan:
9883     case OMPD_teams:
9884     case OMPD_target_data:
9885     case OMPD_target_exit_data:
9886     case OMPD_target_enter_data:
9887     case OMPD_distribute:
9888     case OMPD_distribute_simd:
9889     case OMPD_distribute_parallel_for:
9890     case OMPD_distribute_parallel_for_simd:
9891     case OMPD_teams_distribute:
9892     case OMPD_teams_distribute_simd:
9893     case OMPD_teams_distribute_parallel_for:
9894     case OMPD_teams_distribute_parallel_for_simd:
9895     case OMPD_target_update:
9896     case OMPD_declare_simd:
9897     case OMPD_declare_variant:
9898     case OMPD_begin_declare_variant:
9899     case OMPD_end_declare_variant:
9900     case OMPD_declare_target:
9901     case OMPD_end_declare_target:
9902     case OMPD_declare_reduction:
9903     case OMPD_declare_mapper:
9904     case OMPD_taskloop:
9905     case OMPD_taskloop_simd:
9906     case OMPD_master_taskloop:
9907     case OMPD_master_taskloop_simd:
9908     case OMPD_parallel_master_taskloop:
9909     case OMPD_parallel_master_taskloop_simd:
9910     case OMPD_requires:
9911     case OMPD_metadirective:
9912     case OMPD_unknown:
9913     default:
9914       llvm_unreachable("Unexpected directive.");
9915     }
9916   }
9917 
9918   return nullptr;
9919 }
9920 
9921 /// Emit the user-defined mapper function. The code generation follows the
9922 /// pattern in the example below.
9923 /// \code
9924 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9925 ///                                           void *base, void *begin,
9926 ///                                           int64_t size, int64_t type,
9927 ///                                           void *name = nullptr) {
9928 ///   // Allocate space for an array section first or add a base/begin for
9929 ///   // pointer dereference.
9930 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9931 ///       !maptype.IsDelete)
9932 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9933 ///                                 size*sizeof(Ty), clearToFromMember(type));
9934 ///   // Map members.
9935 ///   for (unsigned i = 0; i < size; i++) {
9936 ///     // For each component specified by this mapper:
9937 ///     for (auto c : begin[i]->all_components) {
9938 ///       if (c.hasMapper())
9939 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9940 ///                       c.arg_type, c.arg_name);
9941 ///       else
9942 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9943 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9944 ///                                     c.arg_name);
9945 ///     }
9946 ///   }
9947 ///   // Delete the array section.
9948 ///   if (size > 1 && maptype.IsDelete)
9949 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9950 ///                                 size*sizeof(Ty), clearToFromMember(type));
9951 /// }
9952 /// \endcode
9953 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9954                                             CodeGenFunction *CGF) {
9955   if (UDMMap.count(D) > 0)
9956     return;
9957   ASTContext &C = CGM.getContext();
9958   QualType Ty = D->getType();
9959   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9960   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9961   auto *MapperVarDecl =
9962       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9963   SourceLocation Loc = D->getLocation();
9964   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9965   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9966 
9967   // Prepare mapper function arguments and attributes.
9968   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9969                               C.VoidPtrTy, ImplicitParamDecl::Other);
9970   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9971                             ImplicitParamDecl::Other);
9972   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9973                              C.VoidPtrTy, ImplicitParamDecl::Other);
9974   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9975                             ImplicitParamDecl::Other);
9976   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9977                             ImplicitParamDecl::Other);
9978   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9979                             ImplicitParamDecl::Other);
9980   FunctionArgList Args;
9981   Args.push_back(&HandleArg);
9982   Args.push_back(&BaseArg);
9983   Args.push_back(&BeginArg);
9984   Args.push_back(&SizeArg);
9985   Args.push_back(&TypeArg);
9986   Args.push_back(&NameArg);
9987   const CGFunctionInfo &FnInfo =
9988       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9989   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9990   SmallString<64> TyStr;
9991   llvm::raw_svector_ostream Out(TyStr);
9992   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9993   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9994   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9995                                     Name, &CGM.getModule());
9996   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9997   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9998   // Start the mapper function code generation.
9999   CodeGenFunction MapperCGF(CGM);
10000   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
10001   // Compute the starting and end addresses of array elements.
10002   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
10003       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10004       C.getPointerType(Int64Ty), Loc);
10005   // Prepare common arguments for array initiation and deletion.
10006   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10007       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10008       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10009   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10010       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10011       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10012   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10013       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10014       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10015   // Convert the size in bytes into the number of array elements.
10016   Size = MapperCGF.Builder.CreateExactUDiv(
10017       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10018   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10019       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10020   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
10021   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10022       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10023       C.getPointerType(Int64Ty), Loc);
10024   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10025       MapperCGF.GetAddrOfLocalVar(&NameArg),
10026       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10027 
10028   // Emit array initiation if this is an array section and \p MapType indicates
10029   // that memory allocation is required.
10030   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10031   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10032                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10033 
10034   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10035 
10036   // Emit the loop header block.
10037   MapperCGF.EmitBlock(HeadBB);
10038   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10039   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10040   // Evaluate whether the initial condition is satisfied.
10041   llvm::Value *IsEmpty =
10042       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10043   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10044   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10045 
10046   // Emit the loop body block.
10047   MapperCGF.EmitBlock(BodyBB);
10048   llvm::BasicBlock *LastBB = BodyBB;
10049   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10050       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10051   PtrPHI->addIncoming(PtrBegin, EntryBB);
10052   Address PtrCurrent(PtrPHI, ElemTy,
10053                      MapperCGF.GetAddrOfLocalVar(&BeginArg)
10054                          .getAlignment()
10055                          .alignmentOfArrayElement(ElementSize));
10056   // Privatize the declared variable of mapper to be the current array element.
10057   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10058   Scope.addPrivate(MapperVarDecl, PtrCurrent);
10059   (void)Scope.Privatize();
10060 
10061   // Get map clause information. Fill up the arrays with all mapped variables.
10062   MappableExprsHandler::MapCombinedInfoTy Info;
10063   MappableExprsHandler MEHandler(*D, MapperCGF);
10064   MEHandler.generateAllInfoForMapper(Info);
10065 
10066   // Call the runtime API __tgt_mapper_num_components to get the number of
10067   // pre-existing components.
10068   llvm::Value *OffloadingArgs[] = {Handle};
10069   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10070       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10071                                             OMPRTL___tgt_mapper_num_components),
10072       OffloadingArgs);
10073   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10074       PreviousSize,
10075       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10076 
10077   // Fill up the runtime mapper handle for all components.
10078   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10079     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10080         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10081     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10082         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10083     llvm::Value *CurSizeArg = Info.Sizes[I];
10084     llvm::Value *CurNameArg =
10085         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10086             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10087             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10088 
10089     // Extract the MEMBER_OF field from the map type.
10090     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10091     llvm::Value *MemberMapType =
10092         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10093 
10094     // Combine the map type inherited from user-defined mapper with that
10095     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10096     // bits of the \a MapType, which is the input argument of the mapper
10097     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10098     // bits of MemberMapType.
10099     // [OpenMP 5.0], 1.2.6. map-type decay.
10100     //        | alloc |  to   | from  | tofrom | release | delete
10101     // ----------------------------------------------------------
10102     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10103     // to     | alloc |  to   | alloc |   to   | release | delete
10104     // from   | alloc | alloc | from  |  from  | release | delete
10105     // tofrom | alloc |  to   | from  | tofrom | release | delete
10106     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10107         MapType,
10108         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10109                                    MappableExprsHandler::OMP_MAP_FROM));
10110     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10111     llvm::BasicBlock *AllocElseBB =
10112         MapperCGF.createBasicBlock("omp.type.alloc.else");
10113     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10114     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10115     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10116     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10117     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10118     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10119     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10120     MapperCGF.EmitBlock(AllocBB);
10121     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10122         MemberMapType,
10123         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10124                                      MappableExprsHandler::OMP_MAP_FROM)));
10125     MapperCGF.Builder.CreateBr(EndBB);
10126     MapperCGF.EmitBlock(AllocElseBB);
10127     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10128         LeftToFrom,
10129         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10130     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10131     // In case of to, clear OMP_MAP_FROM.
10132     MapperCGF.EmitBlock(ToBB);
10133     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10134         MemberMapType,
10135         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10136     MapperCGF.Builder.CreateBr(EndBB);
10137     MapperCGF.EmitBlock(ToElseBB);
10138     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10139         LeftToFrom,
10140         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10141     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10142     // In case of from, clear OMP_MAP_TO.
10143     MapperCGF.EmitBlock(FromBB);
10144     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10145         MemberMapType,
10146         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10147     // In case of tofrom, do nothing.
10148     MapperCGF.EmitBlock(EndBB);
10149     LastBB = EndBB;
10150     llvm::PHINode *CurMapType =
10151         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10152     CurMapType->addIncoming(AllocMapType, AllocBB);
10153     CurMapType->addIncoming(ToMapType, ToBB);
10154     CurMapType->addIncoming(FromMapType, FromBB);
10155     CurMapType->addIncoming(MemberMapType, ToElseBB);
10156 
10157     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10158                                      CurSizeArg, CurMapType, CurNameArg};
10159     if (Info.Mappers[I]) {
10160       // Call the corresponding mapper function.
10161       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10162           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10163       assert(MapperFunc && "Expect a valid mapper function is available.");
10164       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10165     } else {
10166       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10167       // data structure.
10168       MapperCGF.EmitRuntimeCall(
10169           OMPBuilder.getOrCreateRuntimeFunction(
10170               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10171           OffloadingArgs);
10172     }
10173   }
10174 
10175   // Update the pointer to point to the next element that needs to be mapped,
10176   // and check whether we have mapped all elements.
10177   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10178       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10179   PtrPHI->addIncoming(PtrNext, LastBB);
10180   llvm::Value *IsDone =
10181       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10182   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10183   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10184 
10185   MapperCGF.EmitBlock(ExitBB);
10186   // Emit array deletion if this is an array section and \p MapType indicates
10187   // that deletion is required.
10188   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10189                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10190 
10191   // Emit the function exit block.
10192   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10193   MapperCGF.FinishFunction();
10194   UDMMap.try_emplace(D, Fn);
10195   if (CGF) {
10196     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10197     Decls.second.push_back(D);
10198   }
10199 }
10200 
10201 /// Emit the array initialization or deletion portion for user-defined mapper
10202 /// code generation. First, it evaluates whether an array section is mapped and
10203 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10204 /// true, and \a MapType indicates to not delete this array, array
10205 /// initialization code is generated. If \a IsInit is false, and \a MapType
10206 /// indicates to not this array, array deletion code is generated.
10207 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10208     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10209     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10210     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10211     bool IsInit) {
10212   StringRef Prefix = IsInit ? ".init" : ".del";
10213 
10214   // Evaluate if this is an array section.
10215   llvm::BasicBlock *BodyBB =
10216       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10217   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10218       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10219   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10220       MapType,
10221       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10222   llvm::Value *DeleteCond;
10223   llvm::Value *Cond;
10224   if (IsInit) {
10225     // base != begin?
10226     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10227     // IsPtrAndObj?
10228     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10229         MapType,
10230         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10231     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10232     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10233     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10234     DeleteCond = MapperCGF.Builder.CreateIsNull(
10235         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10236   } else {
10237     Cond = IsArray;
10238     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10239         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10240   }
10241   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10242   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10243 
10244   MapperCGF.EmitBlock(BodyBB);
10245   // Get the array size by multiplying element size and element number (i.e., \p
10246   // Size).
10247   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10248       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10249   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10250   // memory allocation/deletion purpose only.
10251   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10252       MapType,
10253       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10254                                    MappableExprsHandler::OMP_MAP_FROM)));
10255   MapTypeArg = MapperCGF.Builder.CreateOr(
10256       MapTypeArg,
10257       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10258 
10259   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10260   // data structure.
10261   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10262                                    ArraySize, MapTypeArg, MapName};
10263   MapperCGF.EmitRuntimeCall(
10264       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10265                                             OMPRTL___tgt_push_mapper_component),
10266       OffloadingArgs);
10267 }
10268 
10269 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10270     const OMPDeclareMapperDecl *D) {
10271   auto I = UDMMap.find(D);
10272   if (I != UDMMap.end())
10273     return I->second;
10274   emitUserDefinedMapper(D);
10275   return UDMMap.lookup(D);
10276 }
10277 
10278 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10279     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10280     llvm::Value *DeviceID,
10281     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10282                                      const OMPLoopDirective &D)>
10283         SizeEmitter) {
10284   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10285   const OMPExecutableDirective *TD = &D;
10286   // Get nested teams distribute kind directive, if any.
10287   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10288     TD = getNestedDistributeDirective(CGM.getContext(), D);
10289   if (!TD)
10290     return;
10291   const auto *LD = cast<OMPLoopDirective>(TD);
10292   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10293                                                          PrePostActionTy &) {
10294     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10295       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10296       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10297       CGF.EmitRuntimeCall(
10298           OMPBuilder.getOrCreateRuntimeFunction(
10299               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10300           Args);
10301     }
10302   };
10303   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10304 }
10305 
10306 void CGOpenMPRuntime::emitTargetCall(
10307     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10308     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10309     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10310     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10311                                      const OMPLoopDirective &D)>
10312         SizeEmitter) {
10313   if (!CGF.HaveInsertPoint())
10314     return;
10315 
10316   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
10317                                    CGM.getLangOpts().OpenMPOffloadMandatory;
10318 
10319   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10320 
10321   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10322                                  D.hasClausesOfKind<OMPNowaitClause>();
10323   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10324   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10325   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10326                                             PrePostActionTy &) {
10327     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10328   };
10329   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10330 
10331   CodeGenFunction::OMPTargetDataInfo InputInfo;
10332   llvm::Value *MapTypesArray = nullptr;
10333   llvm::Value *MapNamesArray = nullptr;
10334   // Generate code for the host fallback function.
10335   auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
10336                         &CS, OffloadingMandatory](CodeGenFunction &CGF) {
10337     if (OffloadingMandatory) {
10338       CGF.Builder.CreateUnreachable();
10339     } else {
10340       if (RequiresOuterTask) {
10341         CapturedVars.clear();
10342         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10343       }
10344       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10345     }
10346   };
10347   // Fill up the pointer arrays and transfer execution to the device.
10348   auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
10349                     &MapNamesArray, SizeEmitter,
10350                     FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10351     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10352       // Reverse offloading is not supported, so just execute on the host.
10353       FallbackGen(CGF);
10354       return;
10355     }
10356 
10357     // On top of the arrays that were filled up, the target offloading call
10358     // takes as arguments the device id as well as the host pointer. The host
10359     // pointer is used by the runtime library to identify the current target
10360     // region, so it only has to be unique and not necessarily point to
10361     // anything. It could be the pointer to the outlined function that
10362     // implements the target region, but we aren't using that so that the
10363     // compiler doesn't need to keep that, and could therefore inline the host
10364     // function if proven worthwhile during optimization.
10365 
10366     // From this point on, we need to have an ID of the target region defined.
10367     assert(OutlinedFnID && "Invalid outlined function ID!");
10368     (void)OutlinedFnID;
10369 
10370     // Emit device ID if any.
10371     llvm::Value *DeviceID;
10372     if (Device.getPointer()) {
10373       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10374               Device.getInt() == OMPC_DEVICE_device_num) &&
10375              "Expected device_num modifier.");
10376       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10377       DeviceID =
10378           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10379     } else {
10380       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10381     }
10382 
10383     // Emit the number of elements in the offloading arrays.
10384     llvm::Value *PointerNum =
10385         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10386 
10387     // Return value of the runtime offloading call.
10388     llvm::Value *Return;
10389 
10390     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10391     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10392 
10393     // Source location for the ident struct
10394     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10395 
10396     // Emit tripcount for the target loop-based directive.
10397     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10398 
10399     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10400     // The target region is an outlined function launched by the runtime
10401     // via calls __tgt_target() or __tgt_target_teams().
10402     //
10403     // __tgt_target() launches a target region with one team and one thread,
10404     // executing a serial region.  This master thread may in turn launch
10405     // more threads within its team upon encountering a parallel region,
10406     // however, no additional teams can be launched on the device.
10407     //
10408     // __tgt_target_teams() launches a target region with one or more teams,
10409     // each with one or more threads.  This call is required for target
10410     // constructs such as:
10411     //  'target teams'
10412     //  'target' / 'teams'
10413     //  'target teams distribute parallel for'
10414     //  'target parallel'
10415     // and so on.
10416     //
10417     // Note that on the host and CPU targets, the runtime implementation of
10418     // these calls simply call the outlined function without forking threads.
10419     // The outlined functions themselves have runtime calls to
10420     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10421     // the compiler in emitTeamsCall() and emitParallelCall().
10422     //
10423     // In contrast, on the NVPTX target, the implementation of
10424     // __tgt_target_teams() launches a GPU kernel with the requested number
10425     // of teams and threads so no additional calls to the runtime are required.
10426     if (NumTeams) {
10427       // If we have NumTeams defined this means that we have an enclosed teams
10428       // region. Therefore we also expect to have NumThreads defined. These two
10429       // values should be defined in the presence of a teams directive,
10430       // regardless of having any clauses associated. If the user is using teams
10431       // but no clauses, these two values will be the default that should be
10432       // passed to the runtime library - a 32-bit integer with the value zero.
10433       assert(NumThreads && "Thread limit expression should be available along "
10434                            "with number of teams.");
10435       SmallVector<llvm::Value *> OffloadingArgs = {
10436           RTLoc,
10437           DeviceID,
10438           OutlinedFnID,
10439           PointerNum,
10440           InputInfo.BasePointersArray.getPointer(),
10441           InputInfo.PointersArray.getPointer(),
10442           InputInfo.SizesArray.getPointer(),
10443           MapTypesArray,
10444           MapNamesArray,
10445           InputInfo.MappersArray.getPointer(),
10446           NumTeams,
10447           NumThreads};
10448       if (HasNowait) {
10449         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10450         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10451         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10452         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10453         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10454         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10455       }
10456       Return = CGF.EmitRuntimeCall(
10457           OMPBuilder.getOrCreateRuntimeFunction(
10458               CGM.getModule(), HasNowait
10459                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10460                                    : OMPRTL___tgt_target_teams_mapper),
10461           OffloadingArgs);
10462     } else {
10463       SmallVector<llvm::Value *> OffloadingArgs = {
10464           RTLoc,
10465           DeviceID,
10466           OutlinedFnID,
10467           PointerNum,
10468           InputInfo.BasePointersArray.getPointer(),
10469           InputInfo.PointersArray.getPointer(),
10470           InputInfo.SizesArray.getPointer(),
10471           MapTypesArray,
10472           MapNamesArray,
10473           InputInfo.MappersArray.getPointer()};
10474       if (HasNowait) {
10475         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10476         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10477         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10478         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10479         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10480         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10481       }
10482       Return = CGF.EmitRuntimeCall(
10483           OMPBuilder.getOrCreateRuntimeFunction(
10484               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10485                                          : OMPRTL___tgt_target_mapper),
10486           OffloadingArgs);
10487     }
10488 
10489     // Check the error code and execute the host version if required.
10490     llvm::BasicBlock *OffloadFailedBlock =
10491         CGF.createBasicBlock("omp_offload.failed");
10492     llvm::BasicBlock *OffloadContBlock =
10493         CGF.createBasicBlock("omp_offload.cont");
10494     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10495     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10496 
10497     CGF.EmitBlock(OffloadFailedBlock);
10498     FallbackGen(CGF);
10499 
10500     CGF.EmitBranch(OffloadContBlock);
10501 
10502     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10503   };
10504 
10505   // Notify that the host version must be executed.
10506   auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10507     FallbackGen(CGF);
10508   };
10509 
10510   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10511                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10512                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10513     // Fill up the arrays with all the captured variables.
10514     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10515 
10516     // Get mappable expression information.
10517     MappableExprsHandler MEHandler(D, CGF);
10518     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10519     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10520 
10521     auto RI = CS.getCapturedRecordDecl()->field_begin();
10522     auto *CV = CapturedVars.begin();
10523     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10524                                               CE = CS.capture_end();
10525          CI != CE; ++CI, ++RI, ++CV) {
10526       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10527       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10528 
10529       // VLA sizes are passed to the outlined region by copy and do not have map
10530       // information associated.
10531       if (CI->capturesVariableArrayType()) {
10532         CurInfo.Exprs.push_back(nullptr);
10533         CurInfo.BasePointers.push_back(*CV);
10534         CurInfo.Pointers.push_back(*CV);
10535         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10536             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10537         // Copy to the device as an argument. No need to retrieve it.
10538         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10539                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10540                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10541         CurInfo.Mappers.push_back(nullptr);
10542       } else {
10543         // If we have any information in the map clause, we use it, otherwise we
10544         // just do a default mapping.
10545         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10546         if (!CI->capturesThis())
10547           MappedVarSet.insert(CI->getCapturedVar());
10548         else
10549           MappedVarSet.insert(nullptr);
10550         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10551           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10552         // Generate correct mapping for variables captured by reference in
10553         // lambdas.
10554         if (CI->capturesVariable())
10555           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10556                                                   CurInfo, LambdaPointers);
10557       }
10558       // We expect to have at least an element of information for this capture.
10559       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10560              "Non-existing map pointer for capture!");
10561       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10562              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10563              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10564              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10565              "Inconsistent map information sizes!");
10566 
10567       // If there is an entry in PartialStruct it means we have a struct with
10568       // individual members mapped. Emit an extra combined entry.
10569       if (PartialStruct.Base.isValid()) {
10570         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10571         MEHandler.emitCombinedEntry(
10572             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10573             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10574       }
10575 
10576       // We need to append the results of this capture to what we already have.
10577       CombinedInfo.append(CurInfo);
10578     }
10579     // Adjust MEMBER_OF flags for the lambdas captures.
10580     MEHandler.adjustMemberOfForLambdaCaptures(
10581         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10582         CombinedInfo.Types);
10583     // Map any list items in a map clause that were not captures because they
10584     // weren't referenced within the construct.
10585     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10586 
10587     TargetDataInfo Info;
10588     // Fill up the arrays and create the arguments.
10589     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10590     emitOffloadingArraysArgument(
10591         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10592         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10593         {/*ForEndCall=*/false});
10594 
10595     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10596     InputInfo.BasePointersArray =
10597         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10598     InputInfo.PointersArray =
10599         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10600     InputInfo.SizesArray =
10601         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10602     InputInfo.MappersArray =
10603         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10604     MapTypesArray = Info.MapTypesArray;
10605     MapNamesArray = Info.MapNamesArray;
10606     if (RequiresOuterTask)
10607       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10608     else
10609       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10610   };
10611 
10612   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10613                              CodeGenFunction &CGF, PrePostActionTy &) {
10614     if (RequiresOuterTask) {
10615       CodeGenFunction::OMPTargetDataInfo InputInfo;
10616       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10617     } else {
10618       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10619     }
10620   };
10621 
10622   // If we have a target function ID it means that we need to support
10623   // offloading, otherwise, just execute on the host. We need to execute on host
10624   // regardless of the conditional in the if clause if, e.g., the user do not
10625   // specify target triples.
10626   if (OutlinedFnID) {
10627     if (IfCond) {
10628       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10629     } else {
10630       RegionCodeGenTy ThenRCG(TargetThenGen);
10631       ThenRCG(CGF);
10632     }
10633   } else {
10634     RegionCodeGenTy ElseRCG(TargetElseGen);
10635     ElseRCG(CGF);
10636   }
10637 }
10638 
10639 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10640                                                     StringRef ParentName) {
10641   if (!S)
10642     return;
10643 
10644   // Codegen OMP target directives that offload compute to the device.
10645   bool RequiresDeviceCodegen =
10646       isa<OMPExecutableDirective>(S) &&
10647       isOpenMPTargetExecutionDirective(
10648           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10649 
10650   if (RequiresDeviceCodegen) {
10651     const auto &E = *cast<OMPExecutableDirective>(S);
10652     unsigned DeviceID;
10653     unsigned FileID;
10654     unsigned Line;
10655     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10656                              FileID, Line);
10657 
10658     // Is this a target region that should not be emitted as an entry point? If
10659     // so just signal we are done with this target region.
10660     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10661                                                             ParentName, Line))
10662       return;
10663 
10664     switch (E.getDirectiveKind()) {
10665     case OMPD_target:
10666       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10667                                                    cast<OMPTargetDirective>(E));
10668       break;
10669     case OMPD_target_parallel:
10670       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10671           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10672       break;
10673     case OMPD_target_teams:
10674       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10675           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10676       break;
10677     case OMPD_target_teams_distribute:
10678       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10679           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10680       break;
10681     case OMPD_target_teams_distribute_simd:
10682       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10683           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10684       break;
10685     case OMPD_target_parallel_for:
10686       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10687           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10688       break;
10689     case OMPD_target_parallel_for_simd:
10690       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10691           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10692       break;
10693     case OMPD_target_simd:
10694       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10695           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10696       break;
10697     case OMPD_target_teams_distribute_parallel_for:
10698       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10699           CGM, ParentName,
10700           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10701       break;
10702     case OMPD_target_teams_distribute_parallel_for_simd:
10703       CodeGenFunction::
10704           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10705               CGM, ParentName,
10706               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10707       break;
10708     case OMPD_parallel:
10709     case OMPD_for:
10710     case OMPD_parallel_for:
10711     case OMPD_parallel_master:
10712     case OMPD_parallel_sections:
10713     case OMPD_for_simd:
10714     case OMPD_parallel_for_simd:
10715     case OMPD_cancel:
10716     case OMPD_cancellation_point:
10717     case OMPD_ordered:
10718     case OMPD_threadprivate:
10719     case OMPD_allocate:
10720     case OMPD_task:
10721     case OMPD_simd:
10722     case OMPD_tile:
10723     case OMPD_unroll:
10724     case OMPD_sections:
10725     case OMPD_section:
10726     case OMPD_single:
10727     case OMPD_master:
10728     case OMPD_critical:
10729     case OMPD_taskyield:
10730     case OMPD_barrier:
10731     case OMPD_taskwait:
10732     case OMPD_taskgroup:
10733     case OMPD_atomic:
10734     case OMPD_flush:
10735     case OMPD_depobj:
10736     case OMPD_scan:
10737     case OMPD_teams:
10738     case OMPD_target_data:
10739     case OMPD_target_exit_data:
10740     case OMPD_target_enter_data:
10741     case OMPD_distribute:
10742     case OMPD_distribute_simd:
10743     case OMPD_distribute_parallel_for:
10744     case OMPD_distribute_parallel_for_simd:
10745     case OMPD_teams_distribute:
10746     case OMPD_teams_distribute_simd:
10747     case OMPD_teams_distribute_parallel_for:
10748     case OMPD_teams_distribute_parallel_for_simd:
10749     case OMPD_target_update:
10750     case OMPD_declare_simd:
10751     case OMPD_declare_variant:
10752     case OMPD_begin_declare_variant:
10753     case OMPD_end_declare_variant:
10754     case OMPD_declare_target:
10755     case OMPD_end_declare_target:
10756     case OMPD_declare_reduction:
10757     case OMPD_declare_mapper:
10758     case OMPD_taskloop:
10759     case OMPD_taskloop_simd:
10760     case OMPD_master_taskloop:
10761     case OMPD_master_taskloop_simd:
10762     case OMPD_parallel_master_taskloop:
10763     case OMPD_parallel_master_taskloop_simd:
10764     case OMPD_requires:
10765     case OMPD_metadirective:
10766     case OMPD_unknown:
10767     default:
10768       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10769     }
10770     return;
10771   }
10772 
10773   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10774     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10775       return;
10776 
10777     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10778     return;
10779   }
10780 
10781   // If this is a lambda function, look into its body.
10782   if (const auto *L = dyn_cast<LambdaExpr>(S))
10783     S = L->getBody();
10784 
10785   // Keep looking for target regions recursively.
10786   for (const Stmt *II : S->children())
10787     scanForTargetRegionsFunctions(II, ParentName);
10788 }
10789 
10790 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10791   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10792       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10793   if (!DevTy)
10794     return false;
10795   // Do not emit device_type(nohost) functions for the host.
10796   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10797     return true;
10798   // Do not emit device_type(host) functions for the device.
10799   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10800     return true;
10801   return false;
10802 }
10803 
10804 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10805   // If emitting code for the host, we do not process FD here. Instead we do
10806   // the normal code generation.
10807   if (!CGM.getLangOpts().OpenMPIsDevice) {
10808     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10809       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10810                                   CGM.getLangOpts().OpenMPIsDevice))
10811         return true;
10812     return false;
10813   }
10814 
10815   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10816   // Try to detect target regions in the function.
10817   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10818     StringRef Name = CGM.getMangledName(GD);
10819     scanForTargetRegionsFunctions(FD->getBody(), Name);
10820     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10821                                 CGM.getLangOpts().OpenMPIsDevice))
10822       return true;
10823   }
10824 
10825   // Do not to emit function if it is not marked as declare target.
10826   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10827          AlreadyEmittedTargetDecls.count(VD) == 0;
10828 }
10829 
10830 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10831   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10832                               CGM.getLangOpts().OpenMPIsDevice))
10833     return true;
10834 
10835   if (!CGM.getLangOpts().OpenMPIsDevice)
10836     return false;
10837 
10838   // Check if there are Ctors/Dtors in this declaration and look for target
10839   // regions in it. We use the complete variant to produce the kernel name
10840   // mangling.
10841   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10842   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10843     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10844       StringRef ParentName =
10845           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10846       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10847     }
10848     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10849       StringRef ParentName =
10850           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10851       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10852     }
10853   }
10854 
10855   // Do not to emit variable if it is not marked as declare target.
10856   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10857       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10858           cast<VarDecl>(GD.getDecl()));
10859   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10860       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10861        HasRequiresUnifiedSharedMemory)) {
10862     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10863     return true;
10864   }
10865   return false;
10866 }
10867 
10868 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10869                                                    llvm::Constant *Addr) {
10870   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10871       !CGM.getLangOpts().OpenMPIsDevice)
10872     return;
10873 
10874   // If we have host/nohost variables, they do not need to be registered.
10875   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10876       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10877   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10878     return;
10879 
10880   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10881       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10882   if (!Res) {
10883     if (CGM.getLangOpts().OpenMPIsDevice) {
10884       // Register non-target variables being emitted in device code (debug info
10885       // may cause this).
10886       StringRef VarName = CGM.getMangledName(VD);
10887       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10888     }
10889     return;
10890   }
10891   // Register declare target variables.
10892   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10893   StringRef VarName;
10894   CharUnits VarSize;
10895   llvm::GlobalValue::LinkageTypes Linkage;
10896 
10897   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10898       !HasRequiresUnifiedSharedMemory) {
10899     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10900     VarName = CGM.getMangledName(VD);
10901     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10902       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10903       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10904     } else {
10905       VarSize = CharUnits::Zero();
10906     }
10907     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10908     // Temp solution to prevent optimizations of the internal variables.
10909     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10910       // Do not create a "ref-variable" if the original is not also available
10911       // on the host.
10912       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10913         return;
10914       std::string RefName = getName({VarName, "ref"});
10915       if (!CGM.GetGlobalValue(RefName)) {
10916         llvm::Constant *AddrRef =
10917             getOrCreateInternalVariable(Addr->getType(), RefName);
10918         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10919         GVAddrRef->setConstant(/*Val=*/true);
10920         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10921         GVAddrRef->setInitializer(Addr);
10922         CGM.addCompilerUsedGlobal(GVAddrRef);
10923       }
10924     }
10925   } else {
10926     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10927             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10928              HasRequiresUnifiedSharedMemory)) &&
10929            "Declare target attribute must link or to with unified memory.");
10930     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10931       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10932     else
10933       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10934 
10935     if (CGM.getLangOpts().OpenMPIsDevice) {
10936       VarName = Addr->getName();
10937       Addr = nullptr;
10938     } else {
10939       VarName = getAddrOfDeclareTargetVar(VD).getName();
10940       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10941     }
10942     VarSize = CGM.getPointerSize();
10943     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10944   }
10945 
10946   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10947       VarName, Addr, VarSize, Flags, Linkage);
10948 }
10949 
10950 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10951   if (isa<FunctionDecl>(GD.getDecl()) ||
10952       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10953     return emitTargetFunctions(GD);
10954 
10955   return emitTargetGlobalVariable(GD);
10956 }
10957 
10958 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10959   for (const VarDecl *VD : DeferredGlobalVariables) {
10960     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10961         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10962     if (!Res)
10963       continue;
10964     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10965         !HasRequiresUnifiedSharedMemory) {
10966       CGM.EmitGlobal(VD);
10967     } else {
10968       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10969               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10970                HasRequiresUnifiedSharedMemory)) &&
10971              "Expected link clause or to clause with unified memory.");
10972       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10973     }
10974   }
10975 }
10976 
10977 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10978     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10979   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10980          " Expected target-based directive.");
10981 }
10982 
10983 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10984   for (const OMPClause *Clause : D->clauselists()) {
10985     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10986       HasRequiresUnifiedSharedMemory = true;
10987     } else if (const auto *AC =
10988                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10989       switch (AC->getAtomicDefaultMemOrderKind()) {
10990       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10991         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10992         break;
10993       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10994         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10995         break;
10996       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10997         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10998         break;
10999       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11000         break;
11001       }
11002     }
11003   }
11004 }
11005 
11006 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11007   return RequiresAtomicOrdering;
11008 }
11009 
11010 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11011                                                        LangAS &AS) {
11012   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11013     return false;
11014   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11015   switch(A->getAllocatorType()) {
11016   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11017   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11018   // Not supported, fallback to the default mem space.
11019   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11020   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11021   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11022   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11023   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11024   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11025   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11026     AS = LangAS::Default;
11027     return true;
11028   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11029     llvm_unreachable("Expected predefined allocator for the variables with the "
11030                      "static storage.");
11031   }
11032   return false;
11033 }
11034 
11035 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11036   return HasRequiresUnifiedSharedMemory;
11037 }
11038 
11039 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11040     CodeGenModule &CGM)
11041     : CGM(CGM) {
11042   if (CGM.getLangOpts().OpenMPIsDevice) {
11043     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11044     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11045   }
11046 }
11047 
11048 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11049   if (CGM.getLangOpts().OpenMPIsDevice)
11050     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11051 }
11052 
11053 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11054   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11055     return true;
11056 
11057   const auto *D = cast<FunctionDecl>(GD.getDecl());
11058   // Do not to emit function if it is marked as declare target as it was already
11059   // emitted.
11060   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11061     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11062       if (auto *F = dyn_cast_or_null<llvm::Function>(
11063               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11064         return !F->isDeclaration();
11065       return false;
11066     }
11067     return true;
11068   }
11069 
11070   return !AlreadyEmittedTargetDecls.insert(D).second;
11071 }
11072 
11073 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11074   // If we don't have entries or if we are emitting code for the device, we
11075   // don't need to do anything.
11076   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11077       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11078       (OffloadEntriesInfoManager.empty() &&
11079        !HasEmittedDeclareTargetRegion &&
11080        !HasEmittedTargetRegion))
11081     return nullptr;
11082 
11083   // Create and register the function that handles the requires directives.
11084   ASTContext &C = CGM.getContext();
11085 
11086   llvm::Function *RequiresRegFn;
11087   {
11088     CodeGenFunction CGF(CGM);
11089     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11090     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11091     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11092     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11093     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11094     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11095     // TODO: check for other requires clauses.
11096     // The requires directive takes effect only when a target region is
11097     // present in the compilation unit. Otherwise it is ignored and not
11098     // passed to the runtime. This avoids the runtime from throwing an error
11099     // for mismatching requires clauses across compilation units that don't
11100     // contain at least 1 target region.
11101     assert((HasEmittedTargetRegion ||
11102             HasEmittedDeclareTargetRegion ||
11103             !OffloadEntriesInfoManager.empty()) &&
11104            "Target or declare target region expected.");
11105     if (HasRequiresUnifiedSharedMemory)
11106       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11107     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11108                             CGM.getModule(), OMPRTL___tgt_register_requires),
11109                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11110     CGF.FinishFunction();
11111   }
11112   return RequiresRegFn;
11113 }
11114 
11115 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11116                                     const OMPExecutableDirective &D,
11117                                     SourceLocation Loc,
11118                                     llvm::Function *OutlinedFn,
11119                                     ArrayRef<llvm::Value *> CapturedVars) {
11120   if (!CGF.HaveInsertPoint())
11121     return;
11122 
11123   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11124   CodeGenFunction::RunCleanupsScope Scope(CGF);
11125 
11126   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11127   llvm::Value *Args[] = {
11128       RTLoc,
11129       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11130       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11131   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11132   RealArgs.append(std::begin(Args), std::end(Args));
11133   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11134 
11135   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11136       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11137   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11138 }
11139 
11140 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11141                                          const Expr *NumTeams,
11142                                          const Expr *ThreadLimit,
11143                                          SourceLocation Loc) {
11144   if (!CGF.HaveInsertPoint())
11145     return;
11146 
11147   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11148 
11149   llvm::Value *NumTeamsVal =
11150       NumTeams
11151           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11152                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11153           : CGF.Builder.getInt32(0);
11154 
11155   llvm::Value *ThreadLimitVal =
11156       ThreadLimit
11157           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11158                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11159           : CGF.Builder.getInt32(0);
11160 
11161   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11162   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11163                                      ThreadLimitVal};
11164   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11165                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11166                       PushNumTeamsArgs);
11167 }
11168 
11169 void CGOpenMPRuntime::emitTargetDataCalls(
11170     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11171     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11172   if (!CGF.HaveInsertPoint())
11173     return;
11174 
11175   // Action used to replace the default codegen action and turn privatization
11176   // off.
11177   PrePostActionTy NoPrivAction;
11178 
11179   // Generate the code for the opening of the data environment. Capture all the
11180   // arguments of the runtime call by reference because they are used in the
11181   // closing of the region.
11182   auto &&BeginThenGen = [this, &D, Device, &Info,
11183                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11184     // Fill up the arrays with all the mapped variables.
11185     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11186 
11187     // Get map clause information.
11188     MappableExprsHandler MEHandler(D, CGF);
11189     MEHandler.generateAllInfo(CombinedInfo);
11190 
11191     // Fill up the arrays and create the arguments.
11192     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11193                          /*IsNonContiguous=*/true);
11194 
11195     llvm::Value *BasePointersArrayArg = nullptr;
11196     llvm::Value *PointersArrayArg = nullptr;
11197     llvm::Value *SizesArrayArg = nullptr;
11198     llvm::Value *MapTypesArrayArg = nullptr;
11199     llvm::Value *MapNamesArrayArg = nullptr;
11200     llvm::Value *MappersArrayArg = nullptr;
11201     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11202                                  SizesArrayArg, MapTypesArrayArg,
11203                                  MapNamesArrayArg, MappersArrayArg, Info);
11204 
11205     // Emit device ID if any.
11206     llvm::Value *DeviceID = nullptr;
11207     if (Device) {
11208       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11209                                            CGF.Int64Ty, /*isSigned=*/true);
11210     } else {
11211       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11212     }
11213 
11214     // Emit the number of elements in the offloading arrays.
11215     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11216     //
11217     // Source location for the ident struct
11218     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11219 
11220     llvm::Value *OffloadingArgs[] = {RTLoc,
11221                                      DeviceID,
11222                                      PointerNum,
11223                                      BasePointersArrayArg,
11224                                      PointersArrayArg,
11225                                      SizesArrayArg,
11226                                      MapTypesArrayArg,
11227                                      MapNamesArrayArg,
11228                                      MappersArrayArg};
11229     CGF.EmitRuntimeCall(
11230         OMPBuilder.getOrCreateRuntimeFunction(
11231             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11232         OffloadingArgs);
11233 
11234     // If device pointer privatization is required, emit the body of the region
11235     // here. It will have to be duplicated: with and without privatization.
11236     if (!Info.CaptureDeviceAddrMap.empty())
11237       CodeGen(CGF);
11238   };
11239 
11240   // Generate code for the closing of the data region.
11241   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11242                                                 PrePostActionTy &) {
11243     assert(Info.isValid() && "Invalid data environment closing arguments.");
11244 
11245     llvm::Value *BasePointersArrayArg = nullptr;
11246     llvm::Value *PointersArrayArg = nullptr;
11247     llvm::Value *SizesArrayArg = nullptr;
11248     llvm::Value *MapTypesArrayArg = nullptr;
11249     llvm::Value *MapNamesArrayArg = nullptr;
11250     llvm::Value *MappersArrayArg = nullptr;
11251     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11252                                  SizesArrayArg, MapTypesArrayArg,
11253                                  MapNamesArrayArg, MappersArrayArg, Info,
11254                                  {/*ForEndCall=*/true});
11255 
11256     // Emit device ID if any.
11257     llvm::Value *DeviceID = nullptr;
11258     if (Device) {
11259       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11260                                            CGF.Int64Ty, /*isSigned=*/true);
11261     } else {
11262       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11263     }
11264 
11265     // Emit the number of elements in the offloading arrays.
11266     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11267 
11268     // Source location for the ident struct
11269     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11270 
11271     llvm::Value *OffloadingArgs[] = {RTLoc,
11272                                      DeviceID,
11273                                      PointerNum,
11274                                      BasePointersArrayArg,
11275                                      PointersArrayArg,
11276                                      SizesArrayArg,
11277                                      MapTypesArrayArg,
11278                                      MapNamesArrayArg,
11279                                      MappersArrayArg};
11280     CGF.EmitRuntimeCall(
11281         OMPBuilder.getOrCreateRuntimeFunction(
11282             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11283         OffloadingArgs);
11284   };
11285 
11286   // If we need device pointer privatization, we need to emit the body of the
11287   // region with no privatization in the 'else' branch of the conditional.
11288   // Otherwise, we don't have to do anything.
11289   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11290                                                          PrePostActionTy &) {
11291     if (!Info.CaptureDeviceAddrMap.empty()) {
11292       CodeGen.setAction(NoPrivAction);
11293       CodeGen(CGF);
11294     }
11295   };
11296 
11297   // We don't have to do anything to close the region if the if clause evaluates
11298   // to false.
11299   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11300 
11301   if (IfCond) {
11302     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11303   } else {
11304     RegionCodeGenTy RCG(BeginThenGen);
11305     RCG(CGF);
11306   }
11307 
11308   // If we don't require privatization of device pointers, we emit the body in
11309   // between the runtime calls. This avoids duplicating the body code.
11310   if (Info.CaptureDeviceAddrMap.empty()) {
11311     CodeGen.setAction(NoPrivAction);
11312     CodeGen(CGF);
11313   }
11314 
11315   if (IfCond) {
11316     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11317   } else {
11318     RegionCodeGenTy RCG(EndThenGen);
11319     RCG(CGF);
11320   }
11321 }
11322 
11323 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11324     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11325     const Expr *Device) {
11326   if (!CGF.HaveInsertPoint())
11327     return;
11328 
11329   assert((isa<OMPTargetEnterDataDirective>(D) ||
11330           isa<OMPTargetExitDataDirective>(D) ||
11331           isa<OMPTargetUpdateDirective>(D)) &&
11332          "Expecting either target enter, exit data, or update directives.");
11333 
11334   CodeGenFunction::OMPTargetDataInfo InputInfo;
11335   llvm::Value *MapTypesArray = nullptr;
11336   llvm::Value *MapNamesArray = nullptr;
11337   // Generate the code for the opening of the data environment.
11338   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11339                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11340     // Emit device ID if any.
11341     llvm::Value *DeviceID = nullptr;
11342     if (Device) {
11343       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11344                                            CGF.Int64Ty, /*isSigned=*/true);
11345     } else {
11346       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11347     }
11348 
11349     // Emit the number of elements in the offloading arrays.
11350     llvm::Constant *PointerNum =
11351         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11352 
11353     // Source location for the ident struct
11354     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11355 
11356     llvm::Value *OffloadingArgs[] = {RTLoc,
11357                                      DeviceID,
11358                                      PointerNum,
11359                                      InputInfo.BasePointersArray.getPointer(),
11360                                      InputInfo.PointersArray.getPointer(),
11361                                      InputInfo.SizesArray.getPointer(),
11362                                      MapTypesArray,
11363                                      MapNamesArray,
11364                                      InputInfo.MappersArray.getPointer()};
11365 
11366     // Select the right runtime function call for each standalone
11367     // directive.
11368     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11369     RuntimeFunction RTLFn;
11370     switch (D.getDirectiveKind()) {
11371     case OMPD_target_enter_data:
11372       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11373                         : OMPRTL___tgt_target_data_begin_mapper;
11374       break;
11375     case OMPD_target_exit_data:
11376       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11377                         : OMPRTL___tgt_target_data_end_mapper;
11378       break;
11379     case OMPD_target_update:
11380       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11381                         : OMPRTL___tgt_target_data_update_mapper;
11382       break;
11383     case OMPD_parallel:
11384     case OMPD_for:
11385     case OMPD_parallel_for:
11386     case OMPD_parallel_master:
11387     case OMPD_parallel_sections:
11388     case OMPD_for_simd:
11389     case OMPD_parallel_for_simd:
11390     case OMPD_cancel:
11391     case OMPD_cancellation_point:
11392     case OMPD_ordered:
11393     case OMPD_threadprivate:
11394     case OMPD_allocate:
11395     case OMPD_task:
11396     case OMPD_simd:
11397     case OMPD_tile:
11398     case OMPD_unroll:
11399     case OMPD_sections:
11400     case OMPD_section:
11401     case OMPD_single:
11402     case OMPD_master:
11403     case OMPD_critical:
11404     case OMPD_taskyield:
11405     case OMPD_barrier:
11406     case OMPD_taskwait:
11407     case OMPD_taskgroup:
11408     case OMPD_atomic:
11409     case OMPD_flush:
11410     case OMPD_depobj:
11411     case OMPD_scan:
11412     case OMPD_teams:
11413     case OMPD_target_data:
11414     case OMPD_distribute:
11415     case OMPD_distribute_simd:
11416     case OMPD_distribute_parallel_for:
11417     case OMPD_distribute_parallel_for_simd:
11418     case OMPD_teams_distribute:
11419     case OMPD_teams_distribute_simd:
11420     case OMPD_teams_distribute_parallel_for:
11421     case OMPD_teams_distribute_parallel_for_simd:
11422     case OMPD_declare_simd:
11423     case OMPD_declare_variant:
11424     case OMPD_begin_declare_variant:
11425     case OMPD_end_declare_variant:
11426     case OMPD_declare_target:
11427     case OMPD_end_declare_target:
11428     case OMPD_declare_reduction:
11429     case OMPD_declare_mapper:
11430     case OMPD_taskloop:
11431     case OMPD_taskloop_simd:
11432     case OMPD_master_taskloop:
11433     case OMPD_master_taskloop_simd:
11434     case OMPD_parallel_master_taskloop:
11435     case OMPD_parallel_master_taskloop_simd:
11436     case OMPD_target:
11437     case OMPD_target_simd:
11438     case OMPD_target_teams_distribute:
11439     case OMPD_target_teams_distribute_simd:
11440     case OMPD_target_teams_distribute_parallel_for:
11441     case OMPD_target_teams_distribute_parallel_for_simd:
11442     case OMPD_target_teams:
11443     case OMPD_target_parallel:
11444     case OMPD_target_parallel_for:
11445     case OMPD_target_parallel_for_simd:
11446     case OMPD_requires:
11447     case OMPD_metadirective:
11448     case OMPD_unknown:
11449     default:
11450       llvm_unreachable("Unexpected standalone target data directive.");
11451       break;
11452     }
11453     CGF.EmitRuntimeCall(
11454         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11455         OffloadingArgs);
11456   };
11457 
11458   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11459                           &MapNamesArray](CodeGenFunction &CGF,
11460                                           PrePostActionTy &) {
11461     // Fill up the arrays with all the mapped variables.
11462     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11463 
11464     // Get map clause information.
11465     MappableExprsHandler MEHandler(D, CGF);
11466     MEHandler.generateAllInfo(CombinedInfo);
11467 
11468     TargetDataInfo Info;
11469     // Fill up the arrays and create the arguments.
11470     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11471                          /*IsNonContiguous=*/true);
11472     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11473                              D.hasClausesOfKind<OMPNowaitClause>();
11474     emitOffloadingArraysArgument(
11475         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11476         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11477         {/*ForEndCall=*/false});
11478     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11479     InputInfo.BasePointersArray =
11480         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11481     InputInfo.PointersArray =
11482         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11483     InputInfo.SizesArray =
11484         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11485     InputInfo.MappersArray =
11486         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11487     MapTypesArray = Info.MapTypesArray;
11488     MapNamesArray = Info.MapNamesArray;
11489     if (RequiresOuterTask)
11490       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11491     else
11492       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11493   };
11494 
11495   if (IfCond) {
11496     emitIfClause(CGF, IfCond, TargetThenGen,
11497                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11498   } else {
11499     RegionCodeGenTy ThenRCG(TargetThenGen);
11500     ThenRCG(CGF);
11501   }
11502 }
11503 
11504 namespace {
11505   /// Kind of parameter in a function with 'declare simd' directive.
11506   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11507   /// Attribute set of the parameter.
11508   struct ParamAttrTy {
11509     ParamKindTy Kind = Vector;
11510     llvm::APSInt StrideOrArg;
11511     llvm::APSInt Alignment;
11512   };
11513 } // namespace
11514 
11515 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11516                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11517   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11518   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11519   // of that clause. The VLEN value must be power of 2.
11520   // In other case the notion of the function`s "characteristic data type" (CDT)
11521   // is used to compute the vector length.
11522   // CDT is defined in the following order:
11523   //   a) For non-void function, the CDT is the return type.
11524   //   b) If the function has any non-uniform, non-linear parameters, then the
11525   //   CDT is the type of the first such parameter.
11526   //   c) If the CDT determined by a) or b) above is struct, union, or class
11527   //   type which is pass-by-value (except for the type that maps to the
11528   //   built-in complex data type), the characteristic data type is int.
11529   //   d) If none of the above three cases is applicable, the CDT is int.
11530   // The VLEN is then determined based on the CDT and the size of vector
11531   // register of that ISA for which current vector version is generated. The
11532   // VLEN is computed using the formula below:
11533   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11534   // where vector register size specified in section 3.2.1 Registers and the
11535   // Stack Frame of original AMD64 ABI document.
11536   QualType RetType = FD->getReturnType();
11537   if (RetType.isNull())
11538     return 0;
11539   ASTContext &C = FD->getASTContext();
11540   QualType CDT;
11541   if (!RetType.isNull() && !RetType->isVoidType()) {
11542     CDT = RetType;
11543   } else {
11544     unsigned Offset = 0;
11545     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11546       if (ParamAttrs[Offset].Kind == Vector)
11547         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11548       ++Offset;
11549     }
11550     if (CDT.isNull()) {
11551       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11552         if (ParamAttrs[I + Offset].Kind == Vector) {
11553           CDT = FD->getParamDecl(I)->getType();
11554           break;
11555         }
11556       }
11557     }
11558   }
11559   if (CDT.isNull())
11560     CDT = C.IntTy;
11561   CDT = CDT->getCanonicalTypeUnqualified();
11562   if (CDT->isRecordType() || CDT->isUnionType())
11563     CDT = C.IntTy;
11564   return C.getTypeSize(CDT);
11565 }
11566 
11567 static void
11568 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11569                            const llvm::APSInt &VLENVal,
11570                            ArrayRef<ParamAttrTy> ParamAttrs,
11571                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11572   struct ISADataTy {
11573     char ISA;
11574     unsigned VecRegSize;
11575   };
11576   ISADataTy ISAData[] = {
11577       {
11578           'b', 128
11579       }, // SSE
11580       {
11581           'c', 256
11582       }, // AVX
11583       {
11584           'd', 256
11585       }, // AVX2
11586       {
11587           'e', 512
11588       }, // AVX512
11589   };
11590   llvm::SmallVector<char, 2> Masked;
11591   switch (State) {
11592   case OMPDeclareSimdDeclAttr::BS_Undefined:
11593     Masked.push_back('N');
11594     Masked.push_back('M');
11595     break;
11596   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11597     Masked.push_back('N');
11598     break;
11599   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11600     Masked.push_back('M');
11601     break;
11602   }
11603   for (char Mask : Masked) {
11604     for (const ISADataTy &Data : ISAData) {
11605       SmallString<256> Buffer;
11606       llvm::raw_svector_ostream Out(Buffer);
11607       Out << "_ZGV" << Data.ISA << Mask;
11608       if (!VLENVal) {
11609         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11610         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11611         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11612       } else {
11613         Out << VLENVal;
11614       }
11615       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11616         switch (ParamAttr.Kind){
11617         case LinearWithVarStride:
11618           Out << 's' << ParamAttr.StrideOrArg;
11619           break;
11620         case Linear:
11621           Out << 'l';
11622           if (ParamAttr.StrideOrArg != 1)
11623             Out << ParamAttr.StrideOrArg;
11624           break;
11625         case Uniform:
11626           Out << 'u';
11627           break;
11628         case Vector:
11629           Out << 'v';
11630           break;
11631         }
11632         if (!!ParamAttr.Alignment)
11633           Out << 'a' << ParamAttr.Alignment;
11634       }
11635       Out << '_' << Fn->getName();
11636       Fn->addFnAttr(Out.str());
11637     }
11638   }
11639 }
11640 
11641 // This are the Functions that are needed to mangle the name of the
11642 // vector functions generated by the compiler, according to the rules
11643 // defined in the "Vector Function ABI specifications for AArch64",
11644 // available at
11645 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11646 
11647 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11648 ///
11649 /// TODO: Need to implement the behavior for reference marked with a
11650 /// var or no linear modifiers (1.b in the section). For this, we
11651 /// need to extend ParamKindTy to support the linear modifiers.
11652 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11653   QT = QT.getCanonicalType();
11654 
11655   if (QT->isVoidType())
11656     return false;
11657 
11658   if (Kind == ParamKindTy::Uniform)
11659     return false;
11660 
11661   if (Kind == ParamKindTy::Linear)
11662     return false;
11663 
11664   // TODO: Handle linear references with modifiers
11665 
11666   if (Kind == ParamKindTy::LinearWithVarStride)
11667     return false;
11668 
11669   return true;
11670 }
11671 
11672 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11673 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11674   QT = QT.getCanonicalType();
11675   unsigned Size = C.getTypeSize(QT);
11676 
11677   // Only scalars and complex within 16 bytes wide set PVB to true.
11678   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11679     return false;
11680 
11681   if (QT->isFloatingType())
11682     return true;
11683 
11684   if (QT->isIntegerType())
11685     return true;
11686 
11687   if (QT->isPointerType())
11688     return true;
11689 
11690   // TODO: Add support for complex types (section 3.1.2, item 2).
11691 
11692   return false;
11693 }
11694 
11695 /// Computes the lane size (LS) of a return type or of an input parameter,
11696 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11697 /// TODO: Add support for references, section 3.2.1, item 1.
11698 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11699   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11700     QualType PTy = QT.getCanonicalType()->getPointeeType();
11701     if (getAArch64PBV(PTy, C))
11702       return C.getTypeSize(PTy);
11703   }
11704   if (getAArch64PBV(QT, C))
11705     return C.getTypeSize(QT);
11706 
11707   return C.getTypeSize(C.getUIntPtrType());
11708 }
11709 
11710 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11711 // signature of the scalar function, as defined in 3.2.2 of the
11712 // AAVFABI.
11713 static std::tuple<unsigned, unsigned, bool>
11714 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11715   QualType RetType = FD->getReturnType().getCanonicalType();
11716 
11717   ASTContext &C = FD->getASTContext();
11718 
11719   bool OutputBecomesInput = false;
11720 
11721   llvm::SmallVector<unsigned, 8> Sizes;
11722   if (!RetType->isVoidType()) {
11723     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11724     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11725       OutputBecomesInput = true;
11726   }
11727   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11728     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11729     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11730   }
11731 
11732   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11733   // The LS of a function parameter / return value can only be a power
11734   // of 2, starting from 8 bits, up to 128.
11735   assert(llvm::all_of(Sizes,
11736                       [](unsigned Size) {
11737                         return Size == 8 || Size == 16 || Size == 32 ||
11738                                Size == 64 || Size == 128;
11739                       }) &&
11740          "Invalid size");
11741 
11742   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11743                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11744                          OutputBecomesInput);
11745 }
11746 
11747 /// Mangle the parameter part of the vector function name according to
11748 /// their OpenMP classification. The mangling function is defined in
11749 /// section 3.5 of the AAVFABI.
11750 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11751   SmallString<256> Buffer;
11752   llvm::raw_svector_ostream Out(Buffer);
11753   for (const auto &ParamAttr : ParamAttrs) {
11754     switch (ParamAttr.Kind) {
11755     case LinearWithVarStride:
11756       Out << "ls" << ParamAttr.StrideOrArg;
11757       break;
11758     case Linear:
11759       Out << 'l';
11760       // Don't print the step value if it is not present or if it is
11761       // equal to 1.
11762       if (ParamAttr.StrideOrArg != 1)
11763         Out << ParamAttr.StrideOrArg;
11764       break;
11765     case Uniform:
11766       Out << 'u';
11767       break;
11768     case Vector:
11769       Out << 'v';
11770       break;
11771     }
11772 
11773     if (!!ParamAttr.Alignment)
11774       Out << 'a' << ParamAttr.Alignment;
11775   }
11776 
11777   return std::string(Out.str());
11778 }
11779 
11780 // Function used to add the attribute. The parameter `VLEN` is
11781 // templated to allow the use of "x" when targeting scalable functions
11782 // for SVE.
11783 template <typename T>
11784 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11785                                  char ISA, StringRef ParSeq,
11786                                  StringRef MangledName, bool OutputBecomesInput,
11787                                  llvm::Function *Fn) {
11788   SmallString<256> Buffer;
11789   llvm::raw_svector_ostream Out(Buffer);
11790   Out << Prefix << ISA << LMask << VLEN;
11791   if (OutputBecomesInput)
11792     Out << "v";
11793   Out << ParSeq << "_" << MangledName;
11794   Fn->addFnAttr(Out.str());
11795 }
11796 
11797 // Helper function to generate the Advanced SIMD names depending on
11798 // the value of the NDS when simdlen is not present.
11799 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11800                                       StringRef Prefix, char ISA,
11801                                       StringRef ParSeq, StringRef MangledName,
11802                                       bool OutputBecomesInput,
11803                                       llvm::Function *Fn) {
11804   switch (NDS) {
11805   case 8:
11806     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11807                          OutputBecomesInput, Fn);
11808     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11809                          OutputBecomesInput, Fn);
11810     break;
11811   case 16:
11812     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11813                          OutputBecomesInput, Fn);
11814     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11815                          OutputBecomesInput, Fn);
11816     break;
11817   case 32:
11818     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11819                          OutputBecomesInput, Fn);
11820     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11821                          OutputBecomesInput, Fn);
11822     break;
11823   case 64:
11824   case 128:
11825     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11826                          OutputBecomesInput, Fn);
11827     break;
11828   default:
11829     llvm_unreachable("Scalar type is too wide.");
11830   }
11831 }
11832 
11833 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11834 static void emitAArch64DeclareSimdFunction(
11835     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11836     ArrayRef<ParamAttrTy> ParamAttrs,
11837     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11838     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11839 
11840   // Get basic data for building the vector signature.
11841   const auto Data = getNDSWDS(FD, ParamAttrs);
11842   const unsigned NDS = std::get<0>(Data);
11843   const unsigned WDS = std::get<1>(Data);
11844   const bool OutputBecomesInput = std::get<2>(Data);
11845 
11846   // Check the values provided via `simdlen` by the user.
11847   // 1. A `simdlen(1)` doesn't produce vector signatures,
11848   if (UserVLEN == 1) {
11849     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11850         DiagnosticsEngine::Warning,
11851         "The clause simdlen(1) has no effect when targeting aarch64.");
11852     CGM.getDiags().Report(SLoc, DiagID);
11853     return;
11854   }
11855 
11856   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11857   // Advanced SIMD output.
11858   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11859     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11860         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11861                                     "power of 2 when targeting Advanced SIMD.");
11862     CGM.getDiags().Report(SLoc, DiagID);
11863     return;
11864   }
11865 
11866   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11867   // limits.
11868   if (ISA == 's' && UserVLEN != 0) {
11869     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11870       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11871           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11872                                       "lanes in the architectural constraints "
11873                                       "for SVE (min is 128-bit, max is "
11874                                       "2048-bit, by steps of 128-bit)");
11875       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11876       return;
11877     }
11878   }
11879 
11880   // Sort out parameter sequence.
11881   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11882   StringRef Prefix = "_ZGV";
11883   // Generate simdlen from user input (if any).
11884   if (UserVLEN) {
11885     if (ISA == 's') {
11886       // SVE generates only a masked function.
11887       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11888                            OutputBecomesInput, Fn);
11889     } else {
11890       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11891       // Advanced SIMD generates one or two functions, depending on
11892       // the `[not]inbranch` clause.
11893       switch (State) {
11894       case OMPDeclareSimdDeclAttr::BS_Undefined:
11895         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11896                              OutputBecomesInput, Fn);
11897         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11898                              OutputBecomesInput, Fn);
11899         break;
11900       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11901         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11902                              OutputBecomesInput, Fn);
11903         break;
11904       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11905         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11906                              OutputBecomesInput, Fn);
11907         break;
11908       }
11909     }
11910   } else {
11911     // If no user simdlen is provided, follow the AAVFABI rules for
11912     // generating the vector length.
11913     if (ISA == 's') {
11914       // SVE, section 3.4.1, item 1.
11915       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11916                            OutputBecomesInput, Fn);
11917     } else {
11918       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11919       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11920       // two vector names depending on the use of the clause
11921       // `[not]inbranch`.
11922       switch (State) {
11923       case OMPDeclareSimdDeclAttr::BS_Undefined:
11924         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11925                                   OutputBecomesInput, Fn);
11926         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11927                                   OutputBecomesInput, Fn);
11928         break;
11929       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11930         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11931                                   OutputBecomesInput, Fn);
11932         break;
11933       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11934         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11935                                   OutputBecomesInput, Fn);
11936         break;
11937       }
11938     }
11939   }
11940 }
11941 
11942 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11943                                               llvm::Function *Fn) {
11944   ASTContext &C = CGM.getContext();
11945   FD = FD->getMostRecentDecl();
11946   while (FD) {
11947     // Map params to their positions in function decl.
11948     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11949     if (isa<CXXMethodDecl>(FD))
11950       ParamPositions.try_emplace(FD, 0);
11951     unsigned ParamPos = ParamPositions.size();
11952     for (const ParmVarDecl *P : FD->parameters()) {
11953       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11954       ++ParamPos;
11955     }
11956     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11957       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11958       // Mark uniform parameters.
11959       for (const Expr *E : Attr->uniforms()) {
11960         E = E->IgnoreParenImpCasts();
11961         unsigned Pos;
11962         if (isa<CXXThisExpr>(E)) {
11963           Pos = ParamPositions[FD];
11964         } else {
11965           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11966                                 ->getCanonicalDecl();
11967           auto It = ParamPositions.find(PVD);
11968           assert(It != ParamPositions.end() && "Function parameter not found");
11969           Pos = It->second;
11970         }
11971         ParamAttrs[Pos].Kind = Uniform;
11972       }
11973       // Get alignment info.
11974       auto *NI = Attr->alignments_begin();
11975       for (const Expr *E : Attr->aligneds()) {
11976         E = E->IgnoreParenImpCasts();
11977         unsigned Pos;
11978         QualType ParmTy;
11979         if (isa<CXXThisExpr>(E)) {
11980           Pos = ParamPositions[FD];
11981           ParmTy = E->getType();
11982         } else {
11983           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11984                                 ->getCanonicalDecl();
11985           auto It = ParamPositions.find(PVD);
11986           assert(It != ParamPositions.end() && "Function parameter not found");
11987           Pos = It->second;
11988           ParmTy = PVD->getType();
11989         }
11990         ParamAttrs[Pos].Alignment =
11991             (*NI)
11992                 ? (*NI)->EvaluateKnownConstInt(C)
11993                 : llvm::APSInt::getUnsigned(
11994                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11995                           .getQuantity());
11996         ++NI;
11997       }
11998       // Mark linear parameters.
11999       auto *SI = Attr->steps_begin();
12000       for (const Expr *E : Attr->linears()) {
12001         E = E->IgnoreParenImpCasts();
12002         unsigned Pos;
12003         // Rescaling factor needed to compute the linear parameter
12004         // value in the mangled name.
12005         unsigned PtrRescalingFactor = 1;
12006         if (isa<CXXThisExpr>(E)) {
12007           Pos = ParamPositions[FD];
12008         } else {
12009           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12010                                 ->getCanonicalDecl();
12011           auto It = ParamPositions.find(PVD);
12012           assert(It != ParamPositions.end() && "Function parameter not found");
12013           Pos = It->second;
12014           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12015             PtrRescalingFactor = CGM.getContext()
12016                                      .getTypeSizeInChars(P->getPointeeType())
12017                                      .getQuantity();
12018         }
12019         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12020         ParamAttr.Kind = Linear;
12021         // Assuming a stride of 1, for `linear` without modifiers.
12022         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12023         if (*SI) {
12024           Expr::EvalResult Result;
12025           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12026             if (const auto *DRE =
12027                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12028               if (const auto *StridePVD =
12029                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12030                 ParamAttr.Kind = LinearWithVarStride;
12031                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
12032                 assert(It != ParamPositions.end() &&
12033                        "Function parameter not found");
12034                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
12035               }
12036             }
12037           } else {
12038             ParamAttr.StrideOrArg = Result.Val.getInt();
12039           }
12040         }
12041         // If we are using a linear clause on a pointer, we need to
12042         // rescale the value of linear_step with the byte size of the
12043         // pointee type.
12044         if (Linear == ParamAttr.Kind)
12045           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12046         ++SI;
12047       }
12048       llvm::APSInt VLENVal;
12049       SourceLocation ExprLoc;
12050       const Expr *VLENExpr = Attr->getSimdlen();
12051       if (VLENExpr) {
12052         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12053         ExprLoc = VLENExpr->getExprLoc();
12054       }
12055       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12056       if (CGM.getTriple().isX86()) {
12057         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12058       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12059         unsigned VLEN = VLENVal.getExtValue();
12060         StringRef MangledName = Fn->getName();
12061         if (CGM.getTarget().hasFeature("sve"))
12062           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12063                                          MangledName, 's', 128, Fn, ExprLoc);
12064         if (CGM.getTarget().hasFeature("neon"))
12065           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12066                                          MangledName, 'n', 128, Fn, ExprLoc);
12067       }
12068     }
12069     FD = FD->getPreviousDecl();
12070   }
12071 }
12072 
12073 namespace {
12074 /// Cleanup action for doacross support.
12075 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12076 public:
12077   static const int DoacrossFinArgs = 2;
12078 
12079 private:
12080   llvm::FunctionCallee RTLFn;
12081   llvm::Value *Args[DoacrossFinArgs];
12082 
12083 public:
12084   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12085                     ArrayRef<llvm::Value *> CallArgs)
12086       : RTLFn(RTLFn) {
12087     assert(CallArgs.size() == DoacrossFinArgs);
12088     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12089   }
12090   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12091     if (!CGF.HaveInsertPoint())
12092       return;
12093     CGF.EmitRuntimeCall(RTLFn, Args);
12094   }
12095 };
12096 } // namespace
12097 
12098 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12099                                        const OMPLoopDirective &D,
12100                                        ArrayRef<Expr *> NumIterations) {
12101   if (!CGF.HaveInsertPoint())
12102     return;
12103 
12104   ASTContext &C = CGM.getContext();
12105   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12106   RecordDecl *RD;
12107   if (KmpDimTy.isNull()) {
12108     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12109     //  kmp_int64 lo; // lower
12110     //  kmp_int64 up; // upper
12111     //  kmp_int64 st; // stride
12112     // };
12113     RD = C.buildImplicitRecord("kmp_dim");
12114     RD->startDefinition();
12115     addFieldToRecordDecl(C, RD, Int64Ty);
12116     addFieldToRecordDecl(C, RD, Int64Ty);
12117     addFieldToRecordDecl(C, RD, Int64Ty);
12118     RD->completeDefinition();
12119     KmpDimTy = C.getRecordType(RD);
12120   } else {
12121     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12122   }
12123   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12124   QualType ArrayTy =
12125       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12126 
12127   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12128   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12129   enum { LowerFD = 0, UpperFD, StrideFD };
12130   // Fill dims with data.
12131   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12132     LValue DimsLVal = CGF.MakeAddrLValue(
12133         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12134     // dims.upper = num_iterations;
12135     LValue UpperLVal = CGF.EmitLValueForField(
12136         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12137     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12138         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12139         Int64Ty, NumIterations[I]->getExprLoc());
12140     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12141     // dims.stride = 1;
12142     LValue StrideLVal = CGF.EmitLValueForField(
12143         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12144     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12145                           StrideLVal);
12146   }
12147 
12148   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12149   // kmp_int32 num_dims, struct kmp_dim * dims);
12150   llvm::Value *Args[] = {
12151       emitUpdateLocation(CGF, D.getBeginLoc()),
12152       getThreadID(CGF, D.getBeginLoc()),
12153       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12154       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12155           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12156           CGM.VoidPtrTy)};
12157 
12158   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12159       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12160   CGF.EmitRuntimeCall(RTLFn, Args);
12161   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12162       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12163   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12164       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12165   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12166                                              llvm::makeArrayRef(FiniArgs));
12167 }
12168 
12169 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12170                                           const OMPDependClause *C) {
12171   QualType Int64Ty =
12172       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12173   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12174   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12175       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12176   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12177   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12178     const Expr *CounterVal = C->getLoopData(I);
12179     assert(CounterVal);
12180     llvm::Value *CntVal = CGF.EmitScalarConversion(
12181         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12182         CounterVal->getExprLoc());
12183     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12184                           /*Volatile=*/false, Int64Ty);
12185   }
12186   llvm::Value *Args[] = {
12187       emitUpdateLocation(CGF, C->getBeginLoc()),
12188       getThreadID(CGF, C->getBeginLoc()),
12189       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12190   llvm::FunctionCallee RTLFn;
12191   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12192     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12193                                                   OMPRTL___kmpc_doacross_post);
12194   } else {
12195     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12196     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12197                                                   OMPRTL___kmpc_doacross_wait);
12198   }
12199   CGF.EmitRuntimeCall(RTLFn, Args);
12200 }
12201 
12202 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12203                                llvm::FunctionCallee Callee,
12204                                ArrayRef<llvm::Value *> Args) const {
12205   assert(Loc.isValid() && "Outlined function call location must be valid.");
12206   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12207 
12208   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12209     if (Fn->doesNotThrow()) {
12210       CGF.EmitNounwindRuntimeCall(Fn, Args);
12211       return;
12212     }
12213   }
12214   CGF.EmitRuntimeCall(Callee, Args);
12215 }
12216 
12217 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12218     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12219     ArrayRef<llvm::Value *> Args) const {
12220   emitCall(CGF, Loc, OutlinedFn, Args);
12221 }
12222 
12223 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12224   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12225     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12226       HasEmittedDeclareTargetRegion = true;
12227 }
12228 
12229 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12230                                              const VarDecl *NativeParam,
12231                                              const VarDecl *TargetParam) const {
12232   return CGF.GetAddrOfLocalVar(NativeParam);
12233 }
12234 
12235 /// Return allocator value from expression, or return a null allocator (default
12236 /// when no allocator specified).
12237 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12238                                     const Expr *Allocator) {
12239   llvm::Value *AllocVal;
12240   if (Allocator) {
12241     AllocVal = CGF.EmitScalarExpr(Allocator);
12242     // According to the standard, the original allocator type is a enum
12243     // (integer). Convert to pointer type, if required.
12244     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12245                                         CGF.getContext().VoidPtrTy,
12246                                         Allocator->getExprLoc());
12247   } else {
12248     // If no allocator specified, it defaults to the null allocator.
12249     AllocVal = llvm::Constant::getNullValue(
12250         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12251   }
12252   return AllocVal;
12253 }
12254 
12255 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12256                                                    const VarDecl *VD) {
12257   if (!VD)
12258     return Address::invalid();
12259   Address UntiedAddr = Address::invalid();
12260   Address UntiedRealAddr = Address::invalid();
12261   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12262   if (It != FunctionToUntiedTaskStackMap.end()) {
12263     const UntiedLocalVarsAddressesMap &UntiedData =
12264         UntiedLocalVarsStack[It->second];
12265     auto I = UntiedData.find(VD);
12266     if (I != UntiedData.end()) {
12267       UntiedAddr = I->second.first;
12268       UntiedRealAddr = I->second.second;
12269     }
12270   }
12271   const VarDecl *CVD = VD->getCanonicalDecl();
12272   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12273     // Use the default allocation.
12274     if (!isAllocatableDecl(VD))
12275       return UntiedAddr;
12276     llvm::Value *Size;
12277     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12278     if (CVD->getType()->isVariablyModifiedType()) {
12279       Size = CGF.getTypeSize(CVD->getType());
12280       // Align the size: ((size + align - 1) / align) * align
12281       Size = CGF.Builder.CreateNUWAdd(
12282           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12283       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12284       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12285     } else {
12286       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12287       Size = CGM.getSize(Sz.alignTo(Align));
12288     }
12289     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12290     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12291     const Expr *Allocator = AA->getAllocator();
12292     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12293     llvm::Value *Alignment =
12294         AA->getAlignment()
12295             ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
12296                                         CGM.SizeTy, /*isSigned=*/false)
12297             : nullptr;
12298     SmallVector<llvm::Value *, 4> Args;
12299     Args.push_back(ThreadID);
12300     if (Alignment)
12301       Args.push_back(Alignment);
12302     Args.push_back(Size);
12303     Args.push_back(AllocVal);
12304     llvm::omp::RuntimeFunction FnID =
12305         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12306     llvm::Value *Addr = CGF.EmitRuntimeCall(
12307         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12308         getName({CVD->getName(), ".void.addr"}));
12309     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12310         CGM.getModule(), OMPRTL___kmpc_free);
12311     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12312     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12313         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12314     if (UntiedAddr.isValid())
12315       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12316 
12317     // Cleanup action for allocate support.
12318     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12319       llvm::FunctionCallee RTLFn;
12320       SourceLocation::UIntTy LocEncoding;
12321       Address Addr;
12322       const Expr *AllocExpr;
12323 
12324     public:
12325       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12326                            SourceLocation::UIntTy LocEncoding, Address Addr,
12327                            const Expr *AllocExpr)
12328           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12329             AllocExpr(AllocExpr) {}
12330       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12331         if (!CGF.HaveInsertPoint())
12332           return;
12333         llvm::Value *Args[3];
12334         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12335             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12336         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12337             Addr.getPointer(), CGF.VoidPtrTy);
12338         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12339         Args[2] = AllocVal;
12340         CGF.EmitRuntimeCall(RTLFn, Args);
12341       }
12342     };
12343     Address VDAddr =
12344         UntiedRealAddr.isValid()
12345             ? UntiedRealAddr
12346             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12347     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12348         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12349         VDAddr, Allocator);
12350     if (UntiedRealAddr.isValid())
12351       if (auto *Region =
12352               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12353         Region->emitUntiedSwitch(CGF);
12354     return VDAddr;
12355   }
12356   return UntiedAddr;
12357 }
12358 
12359 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12360                                              const VarDecl *VD) const {
12361   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12362   if (It == FunctionToUntiedTaskStackMap.end())
12363     return false;
12364   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12365 }
12366 
12367 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12368     CodeGenModule &CGM, const OMPLoopDirective &S)
12369     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12370   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12371   if (!NeedToPush)
12372     return;
12373   NontemporalDeclsSet &DS =
12374       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12375   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12376     for (const Stmt *Ref : C->private_refs()) {
12377       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12378       const ValueDecl *VD;
12379       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12380         VD = DRE->getDecl();
12381       } else {
12382         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12383         assert((ME->isImplicitCXXThis() ||
12384                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12385                "Expected member of current class.");
12386         VD = ME->getMemberDecl();
12387       }
12388       DS.insert(VD);
12389     }
12390   }
12391 }
12392 
12393 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12394   if (!NeedToPush)
12395     return;
12396   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12397 }
12398 
12399 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12400     CodeGenFunction &CGF,
12401     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12402                           std::pair<Address, Address>> &LocalVars)
12403     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12404   if (!NeedToPush)
12405     return;
12406   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12407       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12408   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12409 }
12410 
12411 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12412   if (!NeedToPush)
12413     return;
12414   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12415 }
12416 
12417 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12418   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12419 
12420   return llvm::any_of(
12421       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12422       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12423 }
12424 
12425 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12426     const OMPExecutableDirective &S,
12427     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12428     const {
12429   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12430   // Vars in target/task regions must be excluded completely.
12431   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12432       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12433     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12434     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12435     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12436     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12437       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12438         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12439     }
12440   }
12441   // Exclude vars in private clauses.
12442   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12443     for (const Expr *Ref : C->varlists()) {
12444       if (!Ref->getType()->isScalarType())
12445         continue;
12446       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12447       if (!DRE)
12448         continue;
12449       NeedToCheckForLPCs.insert(DRE->getDecl());
12450     }
12451   }
12452   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12453     for (const Expr *Ref : C->varlists()) {
12454       if (!Ref->getType()->isScalarType())
12455         continue;
12456       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12457       if (!DRE)
12458         continue;
12459       NeedToCheckForLPCs.insert(DRE->getDecl());
12460     }
12461   }
12462   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12463     for (const Expr *Ref : C->varlists()) {
12464       if (!Ref->getType()->isScalarType())
12465         continue;
12466       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12467       if (!DRE)
12468         continue;
12469       NeedToCheckForLPCs.insert(DRE->getDecl());
12470     }
12471   }
12472   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12473     for (const Expr *Ref : C->varlists()) {
12474       if (!Ref->getType()->isScalarType())
12475         continue;
12476       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12477       if (!DRE)
12478         continue;
12479       NeedToCheckForLPCs.insert(DRE->getDecl());
12480     }
12481   }
12482   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12483     for (const Expr *Ref : C->varlists()) {
12484       if (!Ref->getType()->isScalarType())
12485         continue;
12486       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12487       if (!DRE)
12488         continue;
12489       NeedToCheckForLPCs.insert(DRE->getDecl());
12490     }
12491   }
12492   for (const Decl *VD : NeedToCheckForLPCs) {
12493     for (const LastprivateConditionalData &Data :
12494          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12495       if (Data.DeclToUniqueName.count(VD) > 0) {
12496         if (!Data.Disabled)
12497           NeedToAddForLPCsAsDisabled.insert(VD);
12498         break;
12499       }
12500     }
12501   }
12502 }
12503 
12504 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12505     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12506     : CGM(CGF.CGM),
12507       Action((CGM.getLangOpts().OpenMP >= 50 &&
12508               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12509                            [](const OMPLastprivateClause *C) {
12510                              return C->getKind() ==
12511                                     OMPC_LASTPRIVATE_conditional;
12512                            }))
12513                  ? ActionToDo::PushAsLastprivateConditional
12514                  : ActionToDo::DoNotPush) {
12515   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12516   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12517     return;
12518   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12519          "Expected a push action.");
12520   LastprivateConditionalData &Data =
12521       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12522   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12523     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12524       continue;
12525 
12526     for (const Expr *Ref : C->varlists()) {
12527       Data.DeclToUniqueName.insert(std::make_pair(
12528           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12529           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12530     }
12531   }
12532   Data.IVLVal = IVLVal;
12533   Data.Fn = CGF.CurFn;
12534 }
12535 
12536 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12537     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12538     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12539   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12540   if (CGM.getLangOpts().OpenMP < 50)
12541     return;
12542   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12543   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12544   if (!NeedToAddForLPCsAsDisabled.empty()) {
12545     Action = ActionToDo::DisableLastprivateConditional;
12546     LastprivateConditionalData &Data =
12547         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12548     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12549       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12550     Data.Fn = CGF.CurFn;
12551     Data.Disabled = true;
12552   }
12553 }
12554 
12555 CGOpenMPRuntime::LastprivateConditionalRAII
12556 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12557     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12558   return LastprivateConditionalRAII(CGF, S);
12559 }
12560 
12561 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12562   if (CGM.getLangOpts().OpenMP < 50)
12563     return;
12564   if (Action == ActionToDo::DisableLastprivateConditional) {
12565     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12566            "Expected list of disabled private vars.");
12567     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12568   }
12569   if (Action == ActionToDo::PushAsLastprivateConditional) {
12570     assert(
12571         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12572         "Expected list of lastprivate conditional vars.");
12573     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12574   }
12575 }
12576 
12577 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12578                                                         const VarDecl *VD) {
12579   ASTContext &C = CGM.getContext();
12580   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12581   if (I == LastprivateConditionalToTypes.end())
12582     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12583   QualType NewType;
12584   const FieldDecl *VDField;
12585   const FieldDecl *FiredField;
12586   LValue BaseLVal;
12587   auto VI = I->getSecond().find(VD);
12588   if (VI == I->getSecond().end()) {
12589     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12590     RD->startDefinition();
12591     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12592     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12593     RD->completeDefinition();
12594     NewType = C.getRecordType(RD);
12595     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12596     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12597     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12598   } else {
12599     NewType = std::get<0>(VI->getSecond());
12600     VDField = std::get<1>(VI->getSecond());
12601     FiredField = std::get<2>(VI->getSecond());
12602     BaseLVal = std::get<3>(VI->getSecond());
12603   }
12604   LValue FiredLVal =
12605       CGF.EmitLValueForField(BaseLVal, FiredField);
12606   CGF.EmitStoreOfScalar(
12607       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12608       FiredLVal);
12609   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12610 }
12611 
12612 namespace {
12613 /// Checks if the lastprivate conditional variable is referenced in LHS.
12614 class LastprivateConditionalRefChecker final
12615     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12616   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12617   const Expr *FoundE = nullptr;
12618   const Decl *FoundD = nullptr;
12619   StringRef UniqueDeclName;
12620   LValue IVLVal;
12621   llvm::Function *FoundFn = nullptr;
12622   SourceLocation Loc;
12623 
12624 public:
12625   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12626     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12627          llvm::reverse(LPM)) {
12628       auto It = D.DeclToUniqueName.find(E->getDecl());
12629       if (It == D.DeclToUniqueName.end())
12630         continue;
12631       if (D.Disabled)
12632         return false;
12633       FoundE = E;
12634       FoundD = E->getDecl()->getCanonicalDecl();
12635       UniqueDeclName = It->second;
12636       IVLVal = D.IVLVal;
12637       FoundFn = D.Fn;
12638       break;
12639     }
12640     return FoundE == E;
12641   }
12642   bool VisitMemberExpr(const MemberExpr *E) {
12643     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12644       return false;
12645     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12646          llvm::reverse(LPM)) {
12647       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12648       if (It == D.DeclToUniqueName.end())
12649         continue;
12650       if (D.Disabled)
12651         return false;
12652       FoundE = E;
12653       FoundD = E->getMemberDecl()->getCanonicalDecl();
12654       UniqueDeclName = It->second;
12655       IVLVal = D.IVLVal;
12656       FoundFn = D.Fn;
12657       break;
12658     }
12659     return FoundE == E;
12660   }
12661   bool VisitStmt(const Stmt *S) {
12662     for (const Stmt *Child : S->children()) {
12663       if (!Child)
12664         continue;
12665       if (const auto *E = dyn_cast<Expr>(Child))
12666         if (!E->isGLValue())
12667           continue;
12668       if (Visit(Child))
12669         return true;
12670     }
12671     return false;
12672   }
12673   explicit LastprivateConditionalRefChecker(
12674       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12675       : LPM(LPM) {}
12676   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12677   getFoundData() const {
12678     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12679   }
12680 };
12681 } // namespace
12682 
12683 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12684                                                        LValue IVLVal,
12685                                                        StringRef UniqueDeclName,
12686                                                        LValue LVal,
12687                                                        SourceLocation Loc) {
12688   // Last updated loop counter for the lastprivate conditional var.
12689   // int<xx> last_iv = 0;
12690   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12691   llvm::Constant *LastIV =
12692       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12693   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12694       IVLVal.getAlignment().getAsAlign());
12695   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12696 
12697   // Last value of the lastprivate conditional.
12698   // decltype(priv_a) last_a;
12699   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12700       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12701   Last->setAlignment(LVal.getAlignment().getAsAlign());
12702   LValue LastLVal = CGF.MakeAddrLValue(
12703       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12704 
12705   // Global loop counter. Required to handle inner parallel-for regions.
12706   // iv
12707   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12708 
12709   // #pragma omp critical(a)
12710   // if (last_iv <= iv) {
12711   //   last_iv = iv;
12712   //   last_a = priv_a;
12713   // }
12714   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12715                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12716     Action.Enter(CGF);
12717     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12718     // (last_iv <= iv) ? Check if the variable is updated and store new
12719     // value in global var.
12720     llvm::Value *CmpRes;
12721     if (IVLVal.getType()->isSignedIntegerType()) {
12722       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12723     } else {
12724       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12725              "Loop iteration variable must be integer.");
12726       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12727     }
12728     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12729     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12730     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12731     // {
12732     CGF.EmitBlock(ThenBB);
12733 
12734     //   last_iv = iv;
12735     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12736 
12737     //   last_a = priv_a;
12738     switch (CGF.getEvaluationKind(LVal.getType())) {
12739     case TEK_Scalar: {
12740       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12741       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12742       break;
12743     }
12744     case TEK_Complex: {
12745       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12746       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12747       break;
12748     }
12749     case TEK_Aggregate:
12750       llvm_unreachable(
12751           "Aggregates are not supported in lastprivate conditional.");
12752     }
12753     // }
12754     CGF.EmitBranch(ExitBB);
12755     // There is no need to emit line number for unconditional branch.
12756     (void)ApplyDebugLocation::CreateEmpty(CGF);
12757     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12758   };
12759 
12760   if (CGM.getLangOpts().OpenMPSimd) {
12761     // Do not emit as a critical region as no parallel region could be emitted.
12762     RegionCodeGenTy ThenRCG(CodeGen);
12763     ThenRCG(CGF);
12764   } else {
12765     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12766   }
12767 }
12768 
12769 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12770                                                          const Expr *LHS) {
12771   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12772     return;
12773   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12774   if (!Checker.Visit(LHS))
12775     return;
12776   const Expr *FoundE;
12777   const Decl *FoundD;
12778   StringRef UniqueDeclName;
12779   LValue IVLVal;
12780   llvm::Function *FoundFn;
12781   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12782       Checker.getFoundData();
12783   if (FoundFn != CGF.CurFn) {
12784     // Special codegen for inner parallel regions.
12785     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12786     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12787     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12788            "Lastprivate conditional is not found in outer region.");
12789     QualType StructTy = std::get<0>(It->getSecond());
12790     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12791     LValue PrivLVal = CGF.EmitLValue(FoundE);
12792     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12793         PrivLVal.getAddress(CGF),
12794         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12795         CGF.ConvertTypeForMem(StructTy));
12796     LValue BaseLVal =
12797         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12798     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12799     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12800                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12801                         FiredLVal, llvm::AtomicOrdering::Unordered,
12802                         /*IsVolatile=*/true, /*isInit=*/false);
12803     return;
12804   }
12805 
12806   // Private address of the lastprivate conditional in the current context.
12807   // priv_a
12808   LValue LVal = CGF.EmitLValue(FoundE);
12809   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12810                                    FoundE->getExprLoc());
12811 }
12812 
12813 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12814     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12815     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12816   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12817     return;
12818   auto Range = llvm::reverse(LastprivateConditionalStack);
12819   auto It = llvm::find_if(
12820       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12821   if (It == Range.end() || It->Fn != CGF.CurFn)
12822     return;
12823   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12824   assert(LPCI != LastprivateConditionalToTypes.end() &&
12825          "Lastprivates must be registered already.");
12826   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12827   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12828   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12829   for (const auto &Pair : It->DeclToUniqueName) {
12830     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12831     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12832       continue;
12833     auto I = LPCI->getSecond().find(Pair.first);
12834     assert(I != LPCI->getSecond().end() &&
12835            "Lastprivate must be rehistered already.");
12836     // bool Cmp = priv_a.Fired != 0;
12837     LValue BaseLVal = std::get<3>(I->getSecond());
12838     LValue FiredLVal =
12839         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12840     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12841     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12842     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12843     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12844     // if (Cmp) {
12845     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12846     CGF.EmitBlock(ThenBB);
12847     Address Addr = CGF.GetAddrOfLocalVar(VD);
12848     LValue LVal;
12849     if (VD->getType()->isReferenceType())
12850       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12851                                            AlignmentSource::Decl);
12852     else
12853       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12854                                 AlignmentSource::Decl);
12855     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12856                                      D.getBeginLoc());
12857     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12858     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12859     // }
12860   }
12861 }
12862 
12863 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12864     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12865     SourceLocation Loc) {
12866   if (CGF.getLangOpts().OpenMP < 50)
12867     return;
12868   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12869   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12870          "Unknown lastprivate conditional variable.");
12871   StringRef UniqueName = It->second;
12872   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12873   // The variable was not updated in the region - exit.
12874   if (!GV)
12875     return;
12876   LValue LPLVal = CGF.MakeAddrLValue(
12877       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12878       PrivLVal.getType().getNonReferenceType());
12879   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12880   CGF.EmitStoreOfScalar(Res, PrivLVal);
12881 }
12882 
12883 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12884     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12885     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12886   llvm_unreachable("Not supported in SIMD-only mode");
12887 }
12888 
12889 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12890     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12891     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12892   llvm_unreachable("Not supported in SIMD-only mode");
12893 }
12894 
12895 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12896     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12897     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12898     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12899     bool Tied, unsigned &NumberOfParts) {
12900   llvm_unreachable("Not supported in SIMD-only mode");
12901 }
12902 
12903 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12904                                            SourceLocation Loc,
12905                                            llvm::Function *OutlinedFn,
12906                                            ArrayRef<llvm::Value *> CapturedVars,
12907                                            const Expr *IfCond,
12908                                            llvm::Value *NumThreads) {
12909   llvm_unreachable("Not supported in SIMD-only mode");
12910 }
12911 
12912 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12913     CodeGenFunction &CGF, StringRef CriticalName,
12914     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12915     const Expr *Hint) {
12916   llvm_unreachable("Not supported in SIMD-only mode");
12917 }
12918 
12919 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12920                                            const RegionCodeGenTy &MasterOpGen,
12921                                            SourceLocation Loc) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
12925 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12926                                            const RegionCodeGenTy &MasterOpGen,
12927                                            SourceLocation Loc,
12928                                            const Expr *Filter) {
12929   llvm_unreachable("Not supported in SIMD-only mode");
12930 }
12931 
12932 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12933                                             SourceLocation Loc) {
12934   llvm_unreachable("Not supported in SIMD-only mode");
12935 }
12936 
12937 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12938     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12939     SourceLocation Loc) {
12940   llvm_unreachable("Not supported in SIMD-only mode");
12941 }
12942 
12943 void CGOpenMPSIMDRuntime::emitSingleRegion(
12944     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12945     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12946     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12947     ArrayRef<const Expr *> AssignmentOps) {
12948   llvm_unreachable("Not supported in SIMD-only mode");
12949 }
12950 
12951 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12952                                             const RegionCodeGenTy &OrderedOpGen,
12953                                             SourceLocation Loc,
12954                                             bool IsThreads) {
12955   llvm_unreachable("Not supported in SIMD-only mode");
12956 }
12957 
12958 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12959                                           SourceLocation Loc,
12960                                           OpenMPDirectiveKind Kind,
12961                                           bool EmitChecks,
12962                                           bool ForceSimpleCall) {
12963   llvm_unreachable("Not supported in SIMD-only mode");
12964 }
12965 
12966 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12967     CodeGenFunction &CGF, SourceLocation Loc,
12968     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12969     bool Ordered, const DispatchRTInput &DispatchValues) {
12970   llvm_unreachable("Not supported in SIMD-only mode");
12971 }
12972 
12973 void CGOpenMPSIMDRuntime::emitForStaticInit(
12974     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12975     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12976   llvm_unreachable("Not supported in SIMD-only mode");
12977 }
12978 
12979 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12980     CodeGenFunction &CGF, SourceLocation Loc,
12981     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12982   llvm_unreachable("Not supported in SIMD-only mode");
12983 }
12984 
12985 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12986                                                      SourceLocation Loc,
12987                                                      unsigned IVSize,
12988                                                      bool IVSigned) {
12989   llvm_unreachable("Not supported in SIMD-only mode");
12990 }
12991 
12992 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12993                                               SourceLocation Loc,
12994                                               OpenMPDirectiveKind DKind) {
12995   llvm_unreachable("Not supported in SIMD-only mode");
12996 }
12997 
12998 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12999                                               SourceLocation Loc,
13000                                               unsigned IVSize, bool IVSigned,
13001                                               Address IL, Address LB,
13002                                               Address UB, Address ST) {
13003   llvm_unreachable("Not supported in SIMD-only mode");
13004 }
13005 
13006 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
13007                                                llvm::Value *NumThreads,
13008                                                SourceLocation Loc) {
13009   llvm_unreachable("Not supported in SIMD-only mode");
13010 }
13011 
13012 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13013                                              ProcBindKind ProcBind,
13014                                              SourceLocation Loc) {
13015   llvm_unreachable("Not supported in SIMD-only mode");
13016 }
13017 
13018 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13019                                                     const VarDecl *VD,
13020                                                     Address VDAddr,
13021                                                     SourceLocation Loc) {
13022   llvm_unreachable("Not supported in SIMD-only mode");
13023 }
13024 
13025 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13026     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13027     CodeGenFunction *CGF) {
13028   llvm_unreachable("Not supported in SIMD-only mode");
13029 }
13030 
13031 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13032     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13033   llvm_unreachable("Not supported in SIMD-only mode");
13034 }
13035 
13036 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13037                                     ArrayRef<const Expr *> Vars,
13038                                     SourceLocation Loc,
13039                                     llvm::AtomicOrdering AO) {
13040   llvm_unreachable("Not supported in SIMD-only mode");
13041 }
13042 
13043 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13044                                        const OMPExecutableDirective &D,
13045                                        llvm::Function *TaskFunction,
13046                                        QualType SharedsTy, Address Shareds,
13047                                        const Expr *IfCond,
13048                                        const OMPTaskDataTy &Data) {
13049   llvm_unreachable("Not supported in SIMD-only mode");
13050 }
13051 
13052 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13053     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13054     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13055     const Expr *IfCond, const OMPTaskDataTy &Data) {
13056   llvm_unreachable("Not supported in SIMD-only mode");
13057 }
13058 
13059 void CGOpenMPSIMDRuntime::emitReduction(
13060     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13061     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13062     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13063   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13064   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13065                                  ReductionOps, Options);
13066 }
13067 
13068 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13069     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13070     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13071   llvm_unreachable("Not supported in SIMD-only mode");
13072 }
13073 
13074 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13075                                                 SourceLocation Loc,
13076                                                 bool IsWorksharingReduction) {
13077   llvm_unreachable("Not supported in SIMD-only mode");
13078 }
13079 
13080 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13081                                                   SourceLocation Loc,
13082                                                   ReductionCodeGen &RCG,
13083                                                   unsigned N) {
13084   llvm_unreachable("Not supported in SIMD-only mode");
13085 }
13086 
13087 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13088                                                   SourceLocation Loc,
13089                                                   llvm::Value *ReductionsPtr,
13090                                                   LValue SharedLVal) {
13091   llvm_unreachable("Not supported in SIMD-only mode");
13092 }
13093 
13094 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13095                                            SourceLocation Loc,
13096                                            const OMPTaskDataTy &Data) {
13097   llvm_unreachable("Not supported in SIMD-only mode");
13098 }
13099 
13100 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13101     CodeGenFunction &CGF, SourceLocation Loc,
13102     OpenMPDirectiveKind CancelRegion) {
13103   llvm_unreachable("Not supported in SIMD-only mode");
13104 }
13105 
13106 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13107                                          SourceLocation Loc, const Expr *IfCond,
13108                                          OpenMPDirectiveKind CancelRegion) {
13109   llvm_unreachable("Not supported in SIMD-only mode");
13110 }
13111 
13112 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13113     const OMPExecutableDirective &D, StringRef ParentName,
13114     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13115     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13116   llvm_unreachable("Not supported in SIMD-only mode");
13117 }
13118 
13119 void CGOpenMPSIMDRuntime::emitTargetCall(
13120     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13121     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13122     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13123     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13124                                      const OMPLoopDirective &D)>
13125         SizeEmitter) {
13126   llvm_unreachable("Not supported in SIMD-only mode");
13127 }
13128 
13129 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13130   llvm_unreachable("Not supported in SIMD-only mode");
13131 }
13132 
13133 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13134   llvm_unreachable("Not supported in SIMD-only mode");
13135 }
13136 
13137 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13138   return false;
13139 }
13140 
13141 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13142                                         const OMPExecutableDirective &D,
13143                                         SourceLocation Loc,
13144                                         llvm::Function *OutlinedFn,
13145                                         ArrayRef<llvm::Value *> CapturedVars) {
13146   llvm_unreachable("Not supported in SIMD-only mode");
13147 }
13148 
13149 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13150                                              const Expr *NumTeams,
13151                                              const Expr *ThreadLimit,
13152                                              SourceLocation Loc) {
13153   llvm_unreachable("Not supported in SIMD-only mode");
13154 }
13155 
13156 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13157     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13158     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13159   llvm_unreachable("Not supported in SIMD-only mode");
13160 }
13161 
13162 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13163     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13164     const Expr *Device) {
13165   llvm_unreachable("Not supported in SIMD-only mode");
13166 }
13167 
13168 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13169                                            const OMPLoopDirective &D,
13170                                            ArrayRef<Expr *> NumIterations) {
13171   llvm_unreachable("Not supported in SIMD-only mode");
13172 }
13173 
13174 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13175                                               const OMPDependClause *C) {
13176   llvm_unreachable("Not supported in SIMD-only mode");
13177 }
13178 
13179 const VarDecl *
13180 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13181                                         const VarDecl *NativeParam) const {
13182   llvm_unreachable("Not supported in SIMD-only mode");
13183 }
13184 
13185 Address
13186 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13187                                          const VarDecl *NativeParam,
13188                                          const VarDecl *TargetParam) const {
13189   llvm_unreachable("Not supported in SIMD-only mode");
13190 }
13191