1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49 
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54   /// Kinds of OpenMP regions used in codegen.
55   enum CGOpenMPRegionKind {
56     /// Region with outlined function for standalone 'parallel'
57     /// directive.
58     ParallelOutlinedRegion,
59     /// Region with outlined function for standalone 'task' directive.
60     TaskOutlinedRegion,
61     /// Region for constructs that do not require function outlining,
62     /// like 'for', 'sections', 'atomic' etc. directives.
63     InlinedRegion,
64     /// Region with outlined function for standalone 'target' directive.
65     TargetRegion,
66   };
67 
68   CGOpenMPRegionInfo(const CapturedStmt &CS,
69                      const CGOpenMPRegionKind RegionKind,
70                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71                      bool HasCancel)
72       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 
75   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77                      bool HasCancel)
78       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79         Kind(Kind), HasCancel(HasCancel) {}
80 
81   /// Get a variable or parameter for storing global thread id
82   /// inside OpenMP construct.
83   virtual const VarDecl *getThreadIDVariable() const = 0;
84 
85   /// Emit the captured statement body.
86   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 
88   /// Get an LValue for the current ThreadID variable.
89   /// \return LValue for thread id variable. This LValue always has type int32*.
90   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 
92   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 
94   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 
96   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 
98   bool hasCancel() const { return HasCancel; }
99 
100   static bool classof(const CGCapturedStmtInfo *Info) {
101     return Info->getKind() == CR_OpenMP;
102   }
103 
104   ~CGOpenMPRegionInfo() override = default;
105 
106 protected:
107   CGOpenMPRegionKind RegionKind;
108   RegionCodeGenTy CodeGen;
109   OpenMPDirectiveKind Kind;
110   bool HasCancel;
111 };
112 
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
116   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117                              const RegionCodeGenTy &CodeGen,
118                              OpenMPDirectiveKind Kind, bool HasCancel,
119                              StringRef HelperName)
120       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121                            HasCancel),
122         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124   }
125 
126   /// Get a variable or parameter for storing global thread id
127   /// inside OpenMP construct.
128   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 
130   /// Get the name of the capture helper.
131   StringRef getHelperName() const override { return HelperName; }
132 
133   static bool classof(const CGCapturedStmtInfo *Info) {
134     return CGOpenMPRegionInfo::classof(Info) &&
135            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136                ParallelOutlinedRegion;
137   }
138 
139 private:
140   /// A variable or parameter storing global thread id for OpenMP
141   /// constructs.
142   const VarDecl *ThreadIDVar;
143   StringRef HelperName;
144 };
145 
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149   class UntiedTaskActionTy final : public PrePostActionTy {
150     bool Untied;
151     const VarDecl *PartIDVar;
152     const RegionCodeGenTy UntiedCodeGen;
153     llvm::SwitchInst *UntiedSwitch = nullptr;
154 
155   public:
156     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157                        const RegionCodeGenTy &UntiedCodeGen)
158         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159     void Enter(CodeGenFunction &CGF) override {
160       if (Untied) {
161         // Emit task switching point.
162         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163             CGF.GetAddrOfLocalVar(PartIDVar),
164             PartIDVar->getType()->castAs<PointerType>());
165         llvm::Value *Res =
166             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169         CGF.EmitBlock(DoneBB);
170         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173                               CGF.Builder.GetInsertBlock());
174         emitUntiedSwitch(CGF);
175       }
176     }
177     void emitUntiedSwitch(CodeGenFunction &CGF) const {
178       if (Untied) {
179         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180             CGF.GetAddrOfLocalVar(PartIDVar),
181             PartIDVar->getType()->castAs<PointerType>());
182         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183                               PartIdLVal);
184         UntiedCodeGen(CGF);
185         CodeGenFunction::JumpDest CurPoint =
186             CGF.getJumpDestInCurrentScope(".untied.next.");
187         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190                               CGF.Builder.GetInsertBlock());
191         CGF.EmitBranchThroughCleanup(CurPoint);
192         CGF.EmitBlock(CurPoint.getBlock());
193       }
194     }
195     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196   };
197   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198                                  const VarDecl *ThreadIDVar,
199                                  const RegionCodeGenTy &CodeGen,
200                                  OpenMPDirectiveKind Kind, bool HasCancel,
201                                  const UntiedTaskActionTy &Action)
202       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203         ThreadIDVar(ThreadIDVar), Action(Action) {
204     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205   }
206 
207   /// Get a variable or parameter for storing global thread id
208   /// inside OpenMP construct.
209   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 
211   /// Get an LValue for the current ThreadID variable.
212   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 
214   /// Get the name of the capture helper.
215   StringRef getHelperName() const override { return ".omp_outlined."; }
216 
217   void emitUntiedSwitch(CodeGenFunction &CGF) override {
218     Action.emitUntiedSwitch(CGF);
219   }
220 
221   static bool classof(const CGCapturedStmtInfo *Info) {
222     return CGOpenMPRegionInfo::classof(Info) &&
223            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224                TaskOutlinedRegion;
225   }
226 
227 private:
228   /// A variable or parameter storing global thread id for OpenMP
229   /// constructs.
230   const VarDecl *ThreadIDVar;
231   /// Action for emitting code for untied tasks.
232   const UntiedTaskActionTy &Action;
233 };
234 
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
239   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240                             const RegionCodeGenTy &CodeGen,
241                             OpenMPDirectiveKind Kind, bool HasCancel)
242       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243         OldCSI(OldCSI),
244         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 
246   // Retrieve the value of the context parameter.
247   llvm::Value *getContextValue() const override {
248     if (OuterRegionInfo)
249       return OuterRegionInfo->getContextValue();
250     llvm_unreachable("No context value for inlined OpenMP region");
251   }
252 
253   void setContextValue(llvm::Value *V) override {
254     if (OuterRegionInfo) {
255       OuterRegionInfo->setContextValue(V);
256       return;
257     }
258     llvm_unreachable("No context value for inlined OpenMP region");
259   }
260 
261   /// Lookup the captured field decl for a variable.
262   const FieldDecl *lookup(const VarDecl *VD) const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->lookup(VD);
265     // If there is no outer outlined region,no need to lookup in a list of
266     // captured variables, we can use the original one.
267     return nullptr;
268   }
269 
270   FieldDecl *getThisFieldDecl() const override {
271     if (OuterRegionInfo)
272       return OuterRegionInfo->getThisFieldDecl();
273     return nullptr;
274   }
275 
276   /// Get a variable or parameter for storing global thread id
277   /// inside OpenMP construct.
278   const VarDecl *getThreadIDVariable() const override {
279     if (OuterRegionInfo)
280       return OuterRegionInfo->getThreadIDVariable();
281     return nullptr;
282   }
283 
284   /// Get an LValue for the current ThreadID variable.
285   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286     if (OuterRegionInfo)
287       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288     llvm_unreachable("No LValue for inlined OpenMP construct");
289   }
290 
291   /// Get the name of the capture helper.
292   StringRef getHelperName() const override {
293     if (auto *OuterRegionInfo = getOldCSI())
294       return OuterRegionInfo->getHelperName();
295     llvm_unreachable("No helper name for inlined OpenMP construct");
296   }
297 
298   void emitUntiedSwitch(CodeGenFunction &CGF) override {
299     if (OuterRegionInfo)
300       OuterRegionInfo->emitUntiedSwitch(CGF);
301   }
302 
303   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 
305   static bool classof(const CGCapturedStmtInfo *Info) {
306     return CGOpenMPRegionInfo::classof(Info) &&
307            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308   }
309 
310   ~CGOpenMPInlinedRegionInfo() override = default;
311 
312 private:
313   /// CodeGen info about outer OpenMP region.
314   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315   CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317 
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
325   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
327       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328                            /*HasCancel=*/false),
329         HelperName(HelperName) {}
330 
331   /// This is unused for target regions because each starts executing
332   /// with a single thread.
333   const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 
335   /// Get the name of the capture helper.
336   StringRef getHelperName() const override { return HelperName; }
337 
338   static bool classof(const CGCapturedStmtInfo *Info) {
339     return CGOpenMPRegionInfo::classof(Info) &&
340            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341   }
342 
343 private:
344   StringRef HelperName;
345 };
346 
347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348   llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
354   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356                                   OMPD_unknown,
357                                   /*HasCancel=*/false),
358         PrivScope(CGF) {
359     // Make sure the globals captured in the provided statement are local by
360     // using the privatization logic. We assume the same variable is not
361     // captured more than once.
362     for (const auto &C : CS.captures()) {
363       if (!C.capturesVariable() && !C.capturesVariableByCopy())
364         continue;
365 
366       const VarDecl *VD = C.getCapturedVar();
367       if (VD->isLocalVarDeclOrParm())
368         continue;
369 
370       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371                       /*RefersToEnclosingVariableOrCapture=*/false,
372                       VD->getType().getNonReferenceType(), VK_LValue,
373                       C.getLocation());
374       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
375     }
376     (void)PrivScope.Privatize();
377   }
378 
379   /// Lookup the captured field decl for a variable.
380   const FieldDecl *lookup(const VarDecl *VD) const override {
381     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382       return FD;
383     return nullptr;
384   }
385 
386   /// Emit the captured statement body.
387   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388     llvm_unreachable("No body for expressions");
389   }
390 
391   /// Get a variable or parameter for storing global thread id
392   /// inside OpenMP construct.
393   const VarDecl *getThreadIDVariable() const override {
394     llvm_unreachable("No thread id for expressions");
395   }
396 
397   /// Get the name of the capture helper.
398   StringRef getHelperName() const override {
399     llvm_unreachable("No helper name for expressions");
400   }
401 
402   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405   /// Private scope to capture global variables.
406   CodeGenFunction::OMPPrivateScope PrivScope;
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411   CodeGenFunction &CGF;
412   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
413   FieldDecl *LambdaThisCaptureField = nullptr;
414   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415   bool NoInheritance = false;
416 
417 public:
418   /// Constructs region for combined constructs.
419   /// \param CodeGen Code generation sequence for combined directives. Includes
420   /// a list of functions used for code generation of implicitly inlined
421   /// regions.
422   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423                           OpenMPDirectiveKind Kind, bool HasCancel,
424                           bool NoInheritance = true)
425       : CGF(CGF), NoInheritance(NoInheritance) {
426     // Start emission for the construct.
427     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429     if (NoInheritance) {
430       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432       CGF.LambdaThisCaptureField = nullptr;
433       BlockInfo = CGF.BlockInfo;
434       CGF.BlockInfo = nullptr;
435     }
436   }
437 
438   ~InlinedOpenMPRegionRAII() {
439     // Restore original CapturedStmtInfo only if we're done with code emission.
440     auto *OldCSI =
441         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442     delete CGF.CapturedStmtInfo;
443     CGF.CapturedStmtInfo = OldCSI;
444     if (NoInheritance) {
445       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447       CGF.BlockInfo = BlockInfo;
448     }
449   }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456   /// Use trampoline for internal microtask.
457   OMP_IDENT_IMD = 0x01,
458   /// Use c-style ident structure.
459   OMP_IDENT_KMPC = 0x02,
460   /// Atomic reduction option for kmpc_reduce.
461   OMP_ATOMIC_REDUCE = 0x10,
462   /// Explicit 'barrier' directive.
463   OMP_IDENT_BARRIER_EXPL = 0x20,
464   /// Implicit barrier in code.
465   OMP_IDENT_BARRIER_IMPL = 0x40,
466   /// Implicit barrier in 'for' directive.
467   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468   /// Implicit barrier in 'sections' directive.
469   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470   /// Implicit barrier in 'single' directive.
471   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472   /// Call of __kmp_for_static_init for static loop.
473   OMP_IDENT_WORK_LOOP = 0x200,
474   /// Call of __kmp_for_static_init for sections.
475   OMP_IDENT_WORK_SECTIONS = 0x400,
476   /// Call of __kmp_for_static_init for distribute.
477   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 namespace {
482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
483 /// Values for bit flags for marking which requires clauses have been used.
484 enum OpenMPOffloadingRequiresDirFlags : int64_t {
485   /// flag undefined.
486   OMP_REQ_UNDEFINED               = 0x000,
487   /// no requires clause present.
488   OMP_REQ_NONE                    = 0x001,
489   /// reverse_offload clause.
490   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
491   /// unified_address clause.
492   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
493   /// unified_shared_memory clause.
494   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
495   /// dynamic_allocators clause.
496   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
497   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
498 };
499 
500 enum OpenMPOffloadingReservedDeviceIDs {
501   /// Device ID if the device was not defined, runtime should get it
502   /// from environment variables in the spec.
503   OMP_DEVICEID_UNDEF = -1,
504 };
505 } // anonymous namespace
506 
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
513 ///                                  see above  */
514 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
515 ///                                  KMP_IDENT_KMPC identifies this union
516 ///                                  member  */
517 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
518 ///                                  see above */
519 ///#if USE_ITT_BUILD
520 ///                            /*  but currently used for storing
521 ///                                region-specific ITT */
522 ///                            /*  contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
525 ///                                 C++  */
526 ///    char const *psource;    /**< String describing the source location.
527 ///                            The string is composed of semi-colon separated
528 //                             fields which describe the source file,
529 ///                            the function and a pair of line numbers that
530 ///                            delimit the construct.
531 ///                             */
532 /// } ident_t;
533 enum IdentFieldIndex {
534   /// might be used in Fortran
535   IdentField_Reserved_1,
536   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537   IdentField_Flags,
538   /// Not really used in Fortran any more
539   IdentField_Reserved_2,
540   /// Source[4] in Fortran, do not use for C++
541   IdentField_Reserved_3,
542   /// String describing the source location. The string is composed of
543   /// semi-colon separated fields which describe the source file, the function
544   /// and a pair of line numbers that delimit the construct.
545   IdentField_PSource
546 };
547 
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType {
551   /// Lower bound for default (unordered) versions.
552   OMP_sch_lower = 32,
553   OMP_sch_static_chunked = 33,
554   OMP_sch_static = 34,
555   OMP_sch_dynamic_chunked = 35,
556   OMP_sch_guided_chunked = 36,
557   OMP_sch_runtime = 37,
558   OMP_sch_auto = 38,
559   /// static with chunk adjustment (e.g., simd)
560   OMP_sch_static_balanced_chunked = 45,
561   /// Lower bound for 'ordered' versions.
562   OMP_ord_lower = 64,
563   OMP_ord_static_chunked = 65,
564   OMP_ord_static = 66,
565   OMP_ord_dynamic_chunked = 67,
566   OMP_ord_guided_chunked = 68,
567   OMP_ord_runtime = 69,
568   OMP_ord_auto = 70,
569   OMP_sch_default = OMP_sch_static,
570   /// dist_schedule types
571   OMP_dist_sch_static_chunked = 91,
572   OMP_dist_sch_static = 92,
573   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574   /// Set if the monotonic schedule modifier was present.
575   OMP_sch_modifier_monotonic = (1 << 29),
576   /// Set if the nonmonotonic schedule modifier was present.
577   OMP_sch_modifier_nonmonotonic = (1 << 30),
578 };
579 
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581 /// region.
582 class CleanupTy final : public EHScopeStack::Cleanup {
583   PrePostActionTy *Action;
584 
585 public:
586   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588     if (!CGF.HaveInsertPoint())
589       return;
590     Action->Exit(CGF);
591   }
592 };
593 
594 } // anonymous namespace
595 
596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
597   CodeGenFunction::RunCleanupsScope Scope(CGF);
598   if (PrePostAction) {
599     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600     Callback(CodeGen, CGF, *PrePostAction);
601   } else {
602     PrePostActionTy Action;
603     Callback(CodeGen, CGF, Action);
604   }
605 }
606 
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl *
610 getReductionInit(const Expr *ReductionOp) {
611   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613       if (const auto *DRE =
614               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616           return DRD;
617   return nullptr;
618 }
619 
620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
621                                              const OMPDeclareReductionDecl *DRD,
622                                              const Expr *InitOp,
623                                              Address Private, Address Original,
624                                              QualType Ty) {
625   if (DRD->getInitializer()) {
626     std::pair<llvm::Function *, llvm::Function *> Reduction =
627         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
628     const auto *CE = cast<CallExpr>(InitOp);
629     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632     const auto *LHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634     const auto *RHSDRE =
635         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   if (DRD)
691     SrcAddr =
692         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
693 
694   llvm::Value *SrcBegin = nullptr;
695   if (DRD)
696     SrcBegin = SrcAddr.getPointer();
697   llvm::Value *DestBegin = DestAddr.getPointer();
698   // Cast from pointer to array type to pointer to single element.
699   llvm::Value *DestEnd =
700       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
701   // The basic structure here is a while-do loop.
702   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704   llvm::Value *IsEmpty =
705       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707 
708   // Enter the loop body, making that address the current address.
709   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710   CGF.EmitBlock(BodyBB);
711 
712   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713 
714   llvm::PHINode *SrcElementPHI = nullptr;
715   Address SrcElementCurrent = Address::invalid();
716   if (DRD) {
717     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718                                           "omp.arraycpy.srcElementPast");
719     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720     SrcElementCurrent =
721         Address(SrcElementPHI, SrcAddr.getElementType(),
722                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723   }
724   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726   DestElementPHI->addIncoming(DestBegin, EntryBB);
727   Address DestElementCurrent =
728       Address(DestElementPHI, DestAddr.getElementType(),
729               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731   // Emit copy.
732   {
733     CodeGenFunction::RunCleanupsScope InitScope(CGF);
734     if (EmitDeclareReductionInit) {
735       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736                                        SrcElementCurrent, ElementTy);
737     } else
738       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739                            /*IsInitializer=*/false);
740   }
741 
742   if (DRD) {
743     // Shift the address forward by one element.
744     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
746         "omp.arraycpy.dest.element");
747     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748   }
749 
750   // Shift the address forward by one element.
751   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
753       "omp.arraycpy.dest.element");
754   // Check whether we've reached the end.
755   llvm::Value *Done =
756       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
757   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
758   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
759 
760   // Done.
761   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
762 }
763 
764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
765   return CGF.EmitOMPSharedLValue(E);
766 }
767 
768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
769                                             const Expr *E) {
770   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
771     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
772   return LValue();
773 }
774 
775 void ReductionCodeGen::emitAggregateInitialization(
776     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
777     const OMPDeclareReductionDecl *DRD) {
778   // Emit VarDecl with copy init for arrays.
779   // Get the address of the original variable captured in current
780   // captured region.
781   const auto *PrivateVD =
782       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
783   bool EmitDeclareReductionInit =
784       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
785   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
786                        EmitDeclareReductionInit,
787                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
788                                                 : PrivateVD->getInit(),
789                        DRD, SharedAddr);
790 }
791 
792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
793                                    ArrayRef<const Expr *> Origs,
794                                    ArrayRef<const Expr *> Privates,
795                                    ArrayRef<const Expr *> ReductionOps) {
796   ClausesData.reserve(Shareds.size());
797   SharedAddresses.reserve(Shareds.size());
798   Sizes.reserve(Shareds.size());
799   BaseDecls.reserve(Shareds.size());
800   const auto *IOrig = Origs.begin();
801   const auto *IPriv = Privates.begin();
802   const auto *IRed = ReductionOps.begin();
803   for (const Expr *Ref : Shareds) {
804     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
805     std::advance(IOrig, 1);
806     std::advance(IPriv, 1);
807     std::advance(IRed, 1);
808   }
809 }
810 
811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
812   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
813          "Number of generated lvalues must be exactly N.");
814   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
815   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
816   SharedAddresses.emplace_back(First, Second);
817   if (ClausesData[N].Shared == ClausesData[N].Ref) {
818     OrigAddresses.emplace_back(First, Second);
819   } else {
820     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
821     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
822     OrigAddresses.emplace_back(First, Second);
823   }
824 }
825 
826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
827   QualType PrivateType = getPrivateType(N);
828   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829   if (!PrivateType->isVariablyModifiedType()) {
830     Sizes.emplace_back(
831         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832         nullptr);
833     return;
834   }
835   llvm::Value *Size;
836   llvm::Value *SizeInChars;
837   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
838   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
839   if (AsArraySection) {
840     Size = CGF.Builder.CreatePtrDiff(ElemType,
841                                      OrigAddresses[N].second.getPointer(CGF),
842                                      OrigAddresses[N].first.getPointer(CGF));
843     Size = CGF.Builder.CreateNUWAdd(
844         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
845     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
846   } else {
847     SizeInChars =
848         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
849     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
850   }
851   Sizes.emplace_back(SizeInChars, Size);
852   CodeGenFunction::OpaqueValueMapping OpaqueMap(
853       CGF,
854       cast<OpaqueValueExpr>(
855           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
856       RValue::get(Size));
857   CGF.EmitVariablyModifiedType(PrivateType);
858 }
859 
860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
861                                          llvm::Value *Size) {
862   QualType PrivateType = getPrivateType(N);
863   if (!PrivateType->isVariablyModifiedType()) {
864     assert(!Size && !Sizes[N].second &&
865            "Size should be nullptr for non-variably modified reduction "
866            "items.");
867     return;
868   }
869   CodeGenFunction::OpaqueValueMapping OpaqueMap(
870       CGF,
871       cast<OpaqueValueExpr>(
872           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
873       RValue::get(Size));
874   CGF.EmitVariablyModifiedType(PrivateType);
875 }
876 
877 void ReductionCodeGen::emitInitialization(
878     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
879     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
880   assert(SharedAddresses.size() > N && "No variable was generated");
881   const auto *PrivateVD =
882       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
883   const OMPDeclareReductionDecl *DRD =
884       getReductionInit(ClausesData[N].ReductionOp);
885   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
886     if (DRD && DRD->getInitializer())
887       (void)DefaultInit(CGF);
888     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
889   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
890     (void)DefaultInit(CGF);
891     QualType SharedType = SharedAddresses[N].first.getType();
892     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
893                                      PrivateAddr, SharedAddr, SharedType);
894   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897                          PrivateVD->getType().getQualifiers(),
898                          /*IsInitializer=*/false);
899   }
900 }
901 
902 bool ReductionCodeGen::needCleanups(unsigned N) {
903   QualType PrivateType = getPrivateType(N);
904   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
905   return DTorKind != QualType::DK_none;
906 }
907 
908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
909                                     Address PrivateAddr) {
910   QualType PrivateType = getPrivateType(N);
911   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
912   if (needCleanups(N)) {
913     PrivateAddr = CGF.Builder.CreateElementBitCast(
914         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
915     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
916   }
917 }
918 
919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
920                           LValue BaseLV) {
921   BaseTy = BaseTy.getNonReferenceType();
922   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
923          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
924     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
925       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
926     } else {
927       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
928       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
929     }
930     BaseTy = BaseTy->getPointeeType();
931   }
932   return CGF.MakeAddrLValue(
933       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
934                                        CGF.ConvertTypeForMem(ElTy)),
935       BaseLV.getType(), BaseLV.getBaseInfo(),
936       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
937 }
938 
939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           Address OriginalBaseAddress, llvm::Value *Addr) {
941   Address Tmp = Address::invalid();
942   Address TopTmp = Address::invalid();
943   Address MostTopTmp = Address::invalid();
944   BaseTy = BaseTy.getNonReferenceType();
945   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
946          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
947     Tmp = CGF.CreateMemTemp(BaseTy);
948     if (TopTmp.isValid())
949       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
950     else
951       MostTopTmp = Tmp;
952     TopTmp = Tmp;
953     BaseTy = BaseTy->getPointeeType();
954   }
955 
956   if (Tmp.isValid()) {
957     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
958         Addr, Tmp.getElementType());
959     CGF.Builder.CreateStore(Addr, Tmp);
960     return MostTopTmp;
961   }
962 
963   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
964       Addr, OriginalBaseAddress.getType());
965   return OriginalBaseAddress.withPointer(Addr);
966 }
967 
968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
969   const VarDecl *OrigVD = nullptr;
970   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
971     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
972     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
973       Base = TempOASE->getBase()->IgnoreParenImpCasts();
974     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
975       Base = TempASE->getBase()->IgnoreParenImpCasts();
976     DE = cast<DeclRefExpr>(Base);
977     OrigVD = cast<VarDecl>(DE->getDecl());
978   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
979     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
980     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981       Base = TempASE->getBase()->IgnoreParenImpCasts();
982     DE = cast<DeclRefExpr>(Base);
983     OrigVD = cast<VarDecl>(DE->getDecl());
984   }
985   return OrigVD;
986 }
987 
988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
989                                                Address PrivateAddr) {
990   const DeclRefExpr *DE;
991   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
992     BaseDecls.emplace_back(OrigVD);
993     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
994     LValue BaseLValue =
995         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
996                     OriginalBaseLValue);
997     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
998     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
999         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1000         SharedAddr.getPointer());
1001     llvm::Value *PrivatePointer =
1002         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1003             PrivateAddr.getPointer(), SharedAddr.getType());
1004     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1005         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1006     return castToBase(CGF, OrigVD->getType(),
1007                       SharedAddresses[N].first.getType(),
1008                       OriginalBaseLValue.getAddress(CGF), Ptr);
1009   }
1010   BaseDecls.emplace_back(
1011       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1012   return PrivateAddr;
1013 }
1014 
1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1016   const OMPDeclareReductionDecl *DRD =
1017       getReductionInit(ClausesData[N].ReductionOp);
1018   return DRD && DRD->getInitializer();
1019 }
1020 
1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1022   return CGF.EmitLoadOfPointerLValue(
1023       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1024       getThreadIDVariable()->getType()->castAs<PointerType>());
1025 }
1026 
1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1028   if (!CGF.HaveInsertPoint())
1029     return;
1030   // 1.2.2 OpenMP Language Terminology
1031   // Structured block - An executable statement with a single entry at the
1032   // top and a single exit at the bottom.
1033   // The point of exit cannot be a branch out of the structured block.
1034   // longjmp() and throw() must not violate the entry/exit criteria.
1035   CGF.EHStack.pushTerminate();
1036   if (S)
1037     CGF.incrementProfileCounter(S);
1038   CodeGen(CGF);
1039   CGF.EHStack.popTerminate();
1040 }
1041 
1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043     CodeGenFunction &CGF) {
1044   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045                             getThreadIDVariable()->getType(),
1046                             AlignmentSource::Decl);
1047 }
1048 
1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050                                        QualType FieldTy) {
1051   auto *Field = FieldDecl::Create(
1052       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055   Field->setAccess(AS_public);
1056   DC->addDecl(Field);
1057   return Field;
1058 }
1059 
1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061                                  StringRef Separator)
1062     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1064   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1065 
1066   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1067   OMPBuilder.initialize();
1068   loadOffloadInfoMetadata();
1069 }
1070 
1071 void CGOpenMPRuntime::clear() {
1072   InternalVars.clear();
1073   // Clean non-target variable declarations possibly used only in debug info.
1074   for (const auto &Data : EmittedNonTargetVariables) {
1075     if (!Data.getValue().pointsToAliveValue())
1076       continue;
1077     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1078     if (!GV)
1079       continue;
1080     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1081       continue;
1082     GV->eraseFromParent();
1083   }
1084 }
1085 
1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1087   SmallString<128> Buffer;
1088   llvm::raw_svector_ostream OS(Buffer);
1089   StringRef Sep = FirstSeparator;
1090   for (StringRef Part : Parts) {
1091     OS << Sep << Part;
1092     Sep = Separator;
1093   }
1094   return std::string(OS.str());
1095 }
1096 
1097 static llvm::Function *
1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1099                           const Expr *CombinerInitializer, const VarDecl *In,
1100                           const VarDecl *Out, bool IsCombiner) {
1101   // void .omp_combiner.(Ty *in, Ty *out);
1102   ASTContext &C = CGM.getContext();
1103   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1104   FunctionArgList Args;
1105   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1106                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1107   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1108                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   Args.push_back(&OmpOutParm);
1110   Args.push_back(&OmpInParm);
1111   const CGFunctionInfo &FnInfo =
1112       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1113   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1114   std::string Name = CGM.getOpenMPRuntime().getName(
1115       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1116   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1117                                     Name, &CGM.getModule());
1118   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1119   if (CGM.getLangOpts().Optimize) {
1120     Fn->removeFnAttr(llvm::Attribute::NoInline);
1121     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1122     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1123   }
1124   CodeGenFunction CGF(CGM);
1125   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1126   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1127   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1128                     Out->getLocation());
1129   CodeGenFunction::OMPPrivateScope Scope(CGF);
1130   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1131   Scope.addPrivate(
1132       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1133               .getAddress(CGF));
1134   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1135   Scope.addPrivate(
1136       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1137                .getAddress(CGF));
1138   (void)Scope.Privatize();
1139   if (!IsCombiner && Out->hasInit() &&
1140       !CGF.isTrivialInitializer(Out->getInit())) {
1141     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1142                          Out->getType().getQualifiers(),
1143                          /*IsInitializer=*/true);
1144   }
1145   if (CombinerInitializer)
1146     CGF.EmitIgnoredExpr(CombinerInitializer);
1147   Scope.ForceCleanup();
1148   CGF.FinishFunction();
1149   return Fn;
1150 }
1151 
1152 void CGOpenMPRuntime::emitUserDefinedReduction(
1153     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1154   if (UDRMap.count(D) > 0)
1155     return;
1156   llvm::Function *Combiner = emitCombinerOrInitializer(
1157       CGM, D->getType(), D->getCombiner(),
1158       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1159       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1160       /*IsCombiner=*/true);
1161   llvm::Function *Initializer = nullptr;
1162   if (const Expr *Init = D->getInitializer()) {
1163     Initializer = emitCombinerOrInitializer(
1164         CGM, D->getType(),
1165         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1166                                                                      : nullptr,
1167         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1168         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1169         /*IsCombiner=*/false);
1170   }
1171   UDRMap.try_emplace(D, Combiner, Initializer);
1172   if (CGF) {
1173     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1174     Decls.second.push_back(D);
1175   }
1176 }
1177 
1178 std::pair<llvm::Function *, llvm::Function *>
1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1180   auto I = UDRMap.find(D);
1181   if (I != UDRMap.end())
1182     return I->second;
1183   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1184   return UDRMap.lookup(D);
1185 }
1186 
1187 namespace {
1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1189 // Builder if one is present.
1190 struct PushAndPopStackRAII {
1191   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1192                       bool HasCancel, llvm::omp::Directive Kind)
1193       : OMPBuilder(OMPBuilder) {
1194     if (!OMPBuilder)
1195       return;
1196 
1197     // The following callback is the crucial part of clangs cleanup process.
1198     //
1199     // NOTE:
1200     // Once the OpenMPIRBuilder is used to create parallel regions (and
1201     // similar), the cancellation destination (Dest below) is determined via
1202     // IP. That means if we have variables to finalize we split the block at IP,
1203     // use the new block (=BB) as destination to build a JumpDest (via
1204     // getJumpDestInCurrentScope(BB)) which then is fed to
1205     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1206     // to push & pop an FinalizationInfo object.
1207     // The FiniCB will still be needed but at the point where the
1208     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1209     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1210       assert(IP.getBlock()->end() == IP.getPoint() &&
1211              "Clang CG should cause non-terminated block!");
1212       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1213       CGF.Builder.restoreIP(IP);
1214       CodeGenFunction::JumpDest Dest =
1215           CGF.getOMPCancelDestination(OMPD_parallel);
1216       CGF.EmitBranchThroughCleanup(Dest);
1217     };
1218 
1219     // TODO: Remove this once we emit parallel regions through the
1220     //       OpenMPIRBuilder as it can do this setup internally.
1221     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1222     OMPBuilder->pushFinalizationCB(std::move(FI));
1223   }
1224   ~PushAndPopStackRAII() {
1225     if (OMPBuilder)
1226       OMPBuilder->popFinalizationCB();
1227   }
1228   llvm::OpenMPIRBuilder *OMPBuilder;
1229 };
1230 } // namespace
1231 
1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1233     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1234     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1235     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1236   assert(ThreadIDVar->getType()->isPointerType() &&
1237          "thread id variable must be of type kmp_int32 *");
1238   CodeGenFunction CGF(CGM, true);
1239   bool HasCancel = false;
1240   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1241     HasCancel = OPD->hasCancel();
1242   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1243     HasCancel = OPD->hasCancel();
1244   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1245     HasCancel = OPSD->hasCancel();
1246   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1247     HasCancel = OPFD->hasCancel();
1248   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1249     HasCancel = OPFD->hasCancel();
1250   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1251     HasCancel = OPFD->hasCancel();
1252   else if (const auto *OPFD =
1253                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD =
1256                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1257     HasCancel = OPFD->hasCancel();
1258 
1259   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1260   //       parallel region to make cancellation barriers work properly.
1261   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1262   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1263   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1264                                     HasCancel, OutlinedHelperName);
1265   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1266   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1267 }
1268 
1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1270     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1271     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1272   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1273   return emitParallelOrTeamsOutlinedFunction(
1274       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1275 }
1276 
1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1278     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1280   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1281   return emitParallelOrTeamsOutlinedFunction(
1282       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1283 }
1284 
1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1286     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1288     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1289     bool Tied, unsigned &NumberOfParts) {
1290   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1291                                               PrePostActionTy &) {
1292     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1293     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1294     llvm::Value *TaskArgs[] = {
1295         UpLoc, ThreadID,
1296         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1297                                     TaskTVar->getType()->castAs<PointerType>())
1298             .getPointer(CGF)};
1299     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1301                         TaskArgs);
1302   };
1303   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1304                                                             UntiedCodeGen);
1305   CodeGen.setAction(Action);
1306   assert(!ThreadIDVar->getType()->isPointerType() &&
1307          "thread id variable must be of type kmp_int32 for tasks");
1308   const OpenMPDirectiveKind Region =
1309       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1310                                                       : OMPD_task;
1311   const CapturedStmt *CS = D.getCapturedStmt(Region);
1312   bool HasCancel = false;
1313   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1314     HasCancel = TD->hasCancel();
1315   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1316     HasCancel = TD->hasCancel();
1317   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1318     HasCancel = TD->hasCancel();
1319   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1320     HasCancel = TD->hasCancel();
1321 
1322   CodeGenFunction CGF(CGM, true);
1323   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1324                                         InnermostKind, HasCancel, Action);
1325   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1326   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1327   if (!Tied)
1328     NumberOfParts = Action.getNumberOfParts();
1329   return Res;
1330 }
1331 
1332 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1333                              const RecordDecl *RD, const CGRecordLayout &RL,
1334                              ArrayRef<llvm::Constant *> Data) {
1335   llvm::StructType *StructTy = RL.getLLVMType();
1336   unsigned PrevIdx = 0;
1337   ConstantInitBuilder CIBuilder(CGM);
1338   const auto *DI = Data.begin();
1339   for (const FieldDecl *FD : RD->fields()) {
1340     unsigned Idx = RL.getLLVMFieldNo(FD);
1341     // Fill the alignment.
1342     for (unsigned I = PrevIdx; I < Idx; ++I)
1343       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1344     PrevIdx = Idx + 1;
1345     Fields.add(*DI);
1346     ++DI;
1347   }
1348 }
1349 
1350 template <class... As>
1351 static llvm::GlobalVariable *
1352 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1353                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1354                    As &&... Args) {
1355   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1356   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1357   ConstantInitBuilder CIBuilder(CGM);
1358   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1359   buildStructValue(Fields, CGM, RD, RL, Data);
1360   return Fields.finishAndCreateGlobal(
1361       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1362       std::forward<As>(Args)...);
1363 }
1364 
1365 template <typename T>
1366 static void
1367 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1368                                          ArrayRef<llvm::Constant *> Data,
1369                                          T &Parent) {
1370   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1371   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1372   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1373   buildStructValue(Fields, CGM, RD, RL, Data);
1374   Fields.finishAndAddTo(Parent);
1375 }
1376 
1377 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1378                                              bool AtCurrentPoint) {
1379   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1380   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1381 
1382   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1383   if (AtCurrentPoint) {
1384     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1385         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1386   } else {
1387     Elem.second.ServiceInsertPt =
1388         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1389     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1390   }
1391 }
1392 
1393 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1394   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1395   if (Elem.second.ServiceInsertPt) {
1396     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1397     Elem.second.ServiceInsertPt = nullptr;
1398     Ptr->eraseFromParent();
1399   }
1400 }
1401 
1402 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1403                                                   SourceLocation Loc,
1404                                                   SmallString<128> &Buffer) {
1405   llvm::raw_svector_ostream OS(Buffer);
1406   // Build debug location
1407   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1408   OS << ";" << PLoc.getFilename() << ";";
1409   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1410     OS << FD->getQualifiedNameAsString();
1411   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1412   return OS.str();
1413 }
1414 
1415 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1416                                                  SourceLocation Loc,
1417                                                  unsigned Flags) {
1418   uint32_t SrcLocStrSize;
1419   llvm::Constant *SrcLocStr;
1420   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1421       Loc.isInvalid()) {
1422     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1423   } else {
1424     std::string FunctionName;
1425     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1426       FunctionName = FD->getQualifiedNameAsString();
1427     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1428     const char *FileName = PLoc.getFilename();
1429     unsigned Line = PLoc.getLine();
1430     unsigned Column = PLoc.getColumn();
1431     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1432                                                 Column, SrcLocStrSize);
1433   }
1434   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1435   return OMPBuilder.getOrCreateIdent(
1436       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1437 }
1438 
1439 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1440                                           SourceLocation Loc) {
1441   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1442   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1443   // the clang invariants used below might be broken.
1444   if (CGM.getLangOpts().OpenMPIRBuilder) {
1445     SmallString<128> Buffer;
1446     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1447     uint32_t SrcLocStrSize;
1448     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1449         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1450     return OMPBuilder.getOrCreateThreadID(
1451         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1452   }
1453 
1454   llvm::Value *ThreadID = nullptr;
1455   // Check whether we've already cached a load of the thread id in this
1456   // function.
1457   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1458   if (I != OpenMPLocThreadIDMap.end()) {
1459     ThreadID = I->second.ThreadID;
1460     if (ThreadID != nullptr)
1461       return ThreadID;
1462   }
1463   // If exceptions are enabled, do not use parameter to avoid possible crash.
1464   if (auto *OMPRegionInfo =
1465           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1466     if (OMPRegionInfo->getThreadIDVariable()) {
1467       // Check if this an outlined function with thread id passed as argument.
1468       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1469       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1470       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1471           !CGF.getLangOpts().CXXExceptions ||
1472           CGF.Builder.GetInsertBlock() == TopBlock ||
1473           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1474           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1475               TopBlock ||
1476           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1477               CGF.Builder.GetInsertBlock()) {
1478         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1479         // If value loaded in entry block, cache it and use it everywhere in
1480         // function.
1481         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1482           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1483           Elem.second.ThreadID = ThreadID;
1484         }
1485         return ThreadID;
1486       }
1487     }
1488   }
1489 
1490   // This is not an outlined function region - need to call __kmpc_int32
1491   // kmpc_global_thread_num(ident_t *loc).
1492   // Generate thread id value and cache this value for use across the
1493   // function.
1494   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1495   if (!Elem.second.ServiceInsertPt)
1496     setLocThreadIdInsertPt(CGF);
1497   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1498   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1499   llvm::CallInst *Call = CGF.Builder.CreateCall(
1500       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1501                                             OMPRTL___kmpc_global_thread_num),
1502       emitUpdateLocation(CGF, Loc));
1503   Call->setCallingConv(CGF.getRuntimeCC());
1504   Elem.second.ThreadID = Call;
1505   return Call;
1506 }
1507 
1508 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1509   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1510   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1511     clearLocThreadIdInsertPt(CGF);
1512     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1513   }
1514   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1515     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1516       UDRMap.erase(D);
1517     FunctionUDRMap.erase(CGF.CurFn);
1518   }
1519   auto I = FunctionUDMMap.find(CGF.CurFn);
1520   if (I != FunctionUDMMap.end()) {
1521     for(const auto *D : I->second)
1522       UDMMap.erase(D);
1523     FunctionUDMMap.erase(I);
1524   }
1525   LastprivateConditionalToTypes.erase(CGF.CurFn);
1526   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1527 }
1528 
1529 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1530   return OMPBuilder.IdentPtr;
1531 }
1532 
1533 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1534   if (!Kmpc_MicroTy) {
1535     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1536     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1537                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1538     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1539   }
1540   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1541 }
1542 
1543 llvm::FunctionCallee
1544 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1545                                              bool IsGPUDistribute) {
1546   assert((IVSize == 32 || IVSize == 64) &&
1547          "IV size is not compatible with the omp runtime");
1548   StringRef Name;
1549   if (IsGPUDistribute)
1550     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1551                                     : "__kmpc_distribute_static_init_4u")
1552                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1553                                     : "__kmpc_distribute_static_init_8u");
1554   else
1555     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1556                                     : "__kmpc_for_static_init_4u")
1557                         : (IVSigned ? "__kmpc_for_static_init_8"
1558                                     : "__kmpc_for_static_init_8u");
1559 
1560   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1561   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1562   llvm::Type *TypeParams[] = {
1563     getIdentTyPointerTy(),                     // loc
1564     CGM.Int32Ty,                               // tid
1565     CGM.Int32Ty,                               // schedtype
1566     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1567     PtrTy,                                     // p_lower
1568     PtrTy,                                     // p_upper
1569     PtrTy,                                     // p_stride
1570     ITy,                                       // incr
1571     ITy                                        // chunk
1572   };
1573   auto *FnTy =
1574       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1575   return CGM.CreateRuntimeFunction(FnTy, Name);
1576 }
1577 
1578 llvm::FunctionCallee
1579 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1580   assert((IVSize == 32 || IVSize == 64) &&
1581          "IV size is not compatible with the omp runtime");
1582   StringRef Name =
1583       IVSize == 32
1584           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1585           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1586   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1587   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1588                                CGM.Int32Ty,           // tid
1589                                CGM.Int32Ty,           // schedtype
1590                                ITy,                   // lower
1591                                ITy,                   // upper
1592                                ITy,                   // stride
1593                                ITy                    // chunk
1594   };
1595   auto *FnTy =
1596       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1597   return CGM.CreateRuntimeFunction(FnTy, Name);
1598 }
1599 
1600 llvm::FunctionCallee
1601 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1602   assert((IVSize == 32 || IVSize == 64) &&
1603          "IV size is not compatible with the omp runtime");
1604   StringRef Name =
1605       IVSize == 32
1606           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1607           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1608   llvm::Type *TypeParams[] = {
1609       getIdentTyPointerTy(), // loc
1610       CGM.Int32Ty,           // tid
1611   };
1612   auto *FnTy =
1613       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1614   return CGM.CreateRuntimeFunction(FnTy, Name);
1615 }
1616 
1617 llvm::FunctionCallee
1618 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1619   assert((IVSize == 32 || IVSize == 64) &&
1620          "IV size is not compatible with the omp runtime");
1621   StringRef Name =
1622       IVSize == 32
1623           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1624           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1625   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1626   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1627   llvm::Type *TypeParams[] = {
1628     getIdentTyPointerTy(),                     // loc
1629     CGM.Int32Ty,                               // tid
1630     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1631     PtrTy,                                     // p_lower
1632     PtrTy,                                     // p_upper
1633     PtrTy                                      // p_stride
1634   };
1635   auto *FnTy =
1636       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1637   return CGM.CreateRuntimeFunction(FnTy, Name);
1638 }
1639 
1640 /// Obtain information that uniquely identifies a target entry. This
1641 /// consists of the file and device IDs as well as line number associated with
1642 /// the relevant entry source location.
1643 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1644                                      unsigned &DeviceID, unsigned &FileID,
1645                                      unsigned &LineNum) {
1646   SourceManager &SM = C.getSourceManager();
1647 
1648   // The loc should be always valid and have a file ID (the user cannot use
1649   // #pragma directives in macros)
1650 
1651   assert(Loc.isValid() && "Source location is expected to be always valid.");
1652 
1653   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1654   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1655 
1656   llvm::sys::fs::UniqueID ID;
1657   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1658     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1659     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1660     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1661       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1662           << PLoc.getFilename() << EC.message();
1663   }
1664 
1665   DeviceID = ID.getDevice();
1666   FileID = ID.getFile();
1667   LineNum = PLoc.getLine();
1668 }
1669 
1670 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1671   if (CGM.getLangOpts().OpenMPSimd)
1672     return Address::invalid();
1673   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1674       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1675   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1676               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1677                HasRequiresUnifiedSharedMemory))) {
1678     SmallString<64> PtrName;
1679     {
1680       llvm::raw_svector_ostream OS(PtrName);
1681       OS << CGM.getMangledName(GlobalDecl(VD));
1682       if (!VD->isExternallyVisible()) {
1683         unsigned DeviceID, FileID, Line;
1684         getTargetEntryUniqueInfo(CGM.getContext(),
1685                                  VD->getCanonicalDecl()->getBeginLoc(),
1686                                  DeviceID, FileID, Line);
1687         OS << llvm::format("_%x", FileID);
1688       }
1689       OS << "_decl_tgt_ref_ptr";
1690     }
1691     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1692     QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1693     llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1694     if (!Ptr) {
1695       Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1696 
1697       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1698       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1699 
1700       if (!CGM.getLangOpts().OpenMPIsDevice)
1701         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1702       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1703     }
1704     return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1705   }
1706   return Address::invalid();
1707 }
1708 
1709 llvm::Constant *
1710 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1711   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1712          !CGM.getContext().getTargetInfo().isTLSSupported());
1713   // Lookup the entry, lazily creating it if necessary.
1714   std::string Suffix = getName({"cache", ""});
1715   return getOrCreateInternalVariable(
1716       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1717 }
1718 
1719 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1720                                                 const VarDecl *VD,
1721                                                 Address VDAddr,
1722                                                 SourceLocation Loc) {
1723   if (CGM.getLangOpts().OpenMPUseTLS &&
1724       CGM.getContext().getTargetInfo().isTLSSupported())
1725     return VDAddr;
1726 
1727   llvm::Type *VarTy = VDAddr.getElementType();
1728   llvm::Value *Args[] = {
1729       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1730       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1731       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1732       getOrCreateThreadPrivateCache(VD)};
1733   return Address(
1734       CGF.EmitRuntimeCall(
1735           OMPBuilder.getOrCreateRuntimeFunction(
1736               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1737           Args),
1738       CGF.Int8Ty, VDAddr.getAlignment());
1739 }
1740 
1741 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1742     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1743     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1744   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1745   // library.
1746   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1747   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1748                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1749                       OMPLoc);
1750   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1751   // to register constructor/destructor for variable.
1752   llvm::Value *Args[] = {
1753       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1754       Ctor, CopyCtor, Dtor};
1755   CGF.EmitRuntimeCall(
1756       OMPBuilder.getOrCreateRuntimeFunction(
1757           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1758       Args);
1759 }
1760 
1761 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1762     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1763     bool PerformInit, CodeGenFunction *CGF) {
1764   if (CGM.getLangOpts().OpenMPUseTLS &&
1765       CGM.getContext().getTargetInfo().isTLSSupported())
1766     return nullptr;
1767 
1768   VD = VD->getDefinition(CGM.getContext());
1769   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1770     QualType ASTTy = VD->getType();
1771 
1772     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1773     const Expr *Init = VD->getAnyInitializer();
1774     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1775       // Generate function that re-emits the declaration's initializer into the
1776       // threadprivate copy of the variable VD
1777       CodeGenFunction CtorCGF(CGM);
1778       FunctionArgList Args;
1779       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1780                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1781                             ImplicitParamDecl::Other);
1782       Args.push_back(&Dst);
1783 
1784       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1785           CGM.getContext().VoidPtrTy, Args);
1786       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1787       std::string Name = getName({"__kmpc_global_ctor_", ""});
1788       llvm::Function *Fn =
1789           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1790       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1791                             Args, Loc, Loc);
1792       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1793           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1794           CGM.getContext().VoidPtrTy, Dst.getLocation());
1795       Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1796       Arg = CtorCGF.Builder.CreateElementBitCast(
1797           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1798       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1799                                /*IsInitializer=*/true);
1800       ArgVal = CtorCGF.EmitLoadOfScalar(
1801           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1802           CGM.getContext().VoidPtrTy, Dst.getLocation());
1803       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1804       CtorCGF.FinishFunction();
1805       Ctor = Fn;
1806     }
1807     if (VD->getType().isDestructedType() != QualType::DK_none) {
1808       // Generate function that emits destructor call for the threadprivate copy
1809       // of the variable VD
1810       CodeGenFunction DtorCGF(CGM);
1811       FunctionArgList Args;
1812       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1813                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1814                             ImplicitParamDecl::Other);
1815       Args.push_back(&Dst);
1816 
1817       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1818           CGM.getContext().VoidTy, Args);
1819       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1820       std::string Name = getName({"__kmpc_global_dtor_", ""});
1821       llvm::Function *Fn =
1822           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1823       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1824       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1825                             Loc, Loc);
1826       // Create a scope with an artificial location for the body of this function.
1827       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1828       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1829           DtorCGF.GetAddrOfLocalVar(&Dst),
1830           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1831       DtorCGF.emitDestroy(
1832           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1833           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1834           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1835       DtorCGF.FinishFunction();
1836       Dtor = Fn;
1837     }
1838     // Do not emit init function if it is not required.
1839     if (!Ctor && !Dtor)
1840       return nullptr;
1841 
1842     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1843     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1844                                                /*isVarArg=*/false)
1845                            ->getPointerTo();
1846     // Copying constructor for the threadprivate variable.
1847     // Must be NULL - reserved by runtime, but currently it requires that this
1848     // parameter is always NULL. Otherwise it fires assertion.
1849     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1850     if (Ctor == nullptr) {
1851       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1852                                              /*isVarArg=*/false)
1853                          ->getPointerTo();
1854       Ctor = llvm::Constant::getNullValue(CtorTy);
1855     }
1856     if (Dtor == nullptr) {
1857       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1858                                              /*isVarArg=*/false)
1859                          ->getPointerTo();
1860       Dtor = llvm::Constant::getNullValue(DtorTy);
1861     }
1862     if (!CGF) {
1863       auto *InitFunctionTy =
1864           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1865       std::string Name = getName({"__omp_threadprivate_init_", ""});
1866       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1867           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1868       CodeGenFunction InitCGF(CGM);
1869       FunctionArgList ArgList;
1870       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1871                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1872                             Loc, Loc);
1873       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1874       InitCGF.FinishFunction();
1875       return InitFunction;
1876     }
1877     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1878   }
1879   return nullptr;
1880 }
1881 
1882 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1883                                                      llvm::GlobalVariable *Addr,
1884                                                      bool PerformInit) {
1885   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1886       !CGM.getLangOpts().OpenMPIsDevice)
1887     return false;
1888   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1889       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1890   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1891       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1892        HasRequiresUnifiedSharedMemory))
1893     return CGM.getLangOpts().OpenMPIsDevice;
1894   VD = VD->getDefinition(CGM.getContext());
1895   assert(VD && "Unknown VarDecl");
1896 
1897   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1898     return CGM.getLangOpts().OpenMPIsDevice;
1899 
1900   QualType ASTTy = VD->getType();
1901   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1902 
1903   // Produce the unique prefix to identify the new target regions. We use
1904   // the source location of the variable declaration which we know to not
1905   // conflict with any target region.
1906   unsigned DeviceID;
1907   unsigned FileID;
1908   unsigned Line;
1909   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1910   SmallString<128> Buffer, Out;
1911   {
1912     llvm::raw_svector_ostream OS(Buffer);
1913     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1914        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1915   }
1916 
1917   const Expr *Init = VD->getAnyInitializer();
1918   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1919     llvm::Constant *Ctor;
1920     llvm::Constant *ID;
1921     if (CGM.getLangOpts().OpenMPIsDevice) {
1922       // Generate function that re-emits the declaration's initializer into
1923       // the threadprivate copy of the variable VD
1924       CodeGenFunction CtorCGF(CGM);
1925 
1926       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1927       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1928       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1929           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1930       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1931       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1932                             FunctionArgList(), Loc, Loc);
1933       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1934       llvm::Constant *AddrInAS0 = Addr;
1935       if (Addr->getAddressSpace() != 0)
1936         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1937             Addr, llvm::PointerType::getWithSamePointeeType(
1938                       cast<llvm::PointerType>(Addr->getType()), 0));
1939       CtorCGF.EmitAnyExprToMem(Init,
1940                                Address(AddrInAS0, Addr->getValueType(),
1941                                        CGM.getContext().getDeclAlign(VD)),
1942                                Init->getType().getQualifiers(),
1943                                /*IsInitializer=*/true);
1944       CtorCGF.FinishFunction();
1945       Ctor = Fn;
1946       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1947       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1948     } else {
1949       Ctor = new llvm::GlobalVariable(
1950           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1951           llvm::GlobalValue::PrivateLinkage,
1952           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1953       ID = Ctor;
1954     }
1955 
1956     // Register the information for the entry associated with the constructor.
1957     Out.clear();
1958     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1959         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1960         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1961   }
1962   if (VD->getType().isDestructedType() != QualType::DK_none) {
1963     llvm::Constant *Dtor;
1964     llvm::Constant *ID;
1965     if (CGM.getLangOpts().OpenMPIsDevice) {
1966       // Generate function that emits destructor call for the threadprivate
1967       // copy of the variable VD
1968       CodeGenFunction DtorCGF(CGM);
1969 
1970       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1971       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1972       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1973           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1974       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1975       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1976                             FunctionArgList(), Loc, Loc);
1977       // Create a scope with an artificial location for the body of this
1978       // function.
1979       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1980       llvm::Constant *AddrInAS0 = Addr;
1981       if (Addr->getAddressSpace() != 0)
1982         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1983             Addr, llvm::PointerType::getWithSamePointeeType(
1984                       cast<llvm::PointerType>(Addr->getType()), 0));
1985       DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1986                                   CGM.getContext().getDeclAlign(VD)),
1987                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1988                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1989       DtorCGF.FinishFunction();
1990       Dtor = Fn;
1991       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1992       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1993     } else {
1994       Dtor = new llvm::GlobalVariable(
1995           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1996           llvm::GlobalValue::PrivateLinkage,
1997           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1998       ID = Dtor;
1999     }
2000     // Register the information for the entry associated with the destructor.
2001     Out.clear();
2002     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2003         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2004         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2005   }
2006   return CGM.getLangOpts().OpenMPIsDevice;
2007 }
2008 
2009 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2010                                                           QualType VarType,
2011                                                           StringRef Name) {
2012   std::string Suffix = getName({"artificial", ""});
2013   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2014   llvm::GlobalVariable *GAddr =
2015       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2016   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2017       CGM.getTarget().isTLSSupported()) {
2018     GAddr->setThreadLocal(/*Val=*/true);
2019     return Address(GAddr, GAddr->getValueType(),
2020                    CGM.getContext().getTypeAlignInChars(VarType));
2021   }
2022   std::string CacheSuffix = getName({"cache", ""});
2023   llvm::Value *Args[] = {
2024       emitUpdateLocation(CGF, SourceLocation()),
2025       getThreadID(CGF, SourceLocation()),
2026       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2027       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2028                                 /*isSigned=*/false),
2029       getOrCreateInternalVariable(
2030           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2031   return Address(
2032       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2033           CGF.EmitRuntimeCall(
2034               OMPBuilder.getOrCreateRuntimeFunction(
2035                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2036               Args),
2037           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2038       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
2039 }
2040 
2041 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2042                                    const RegionCodeGenTy &ThenGen,
2043                                    const RegionCodeGenTy &ElseGen) {
2044   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2045 
2046   // If the condition constant folds and can be elided, try to avoid emitting
2047   // the condition and the dead arm of the if/else.
2048   bool CondConstant;
2049   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2050     if (CondConstant)
2051       ThenGen(CGF);
2052     else
2053       ElseGen(CGF);
2054     return;
2055   }
2056 
2057   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2058   // emit the conditional branch.
2059   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2060   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2061   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2062   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2063 
2064   // Emit the 'then' code.
2065   CGF.EmitBlock(ThenBlock);
2066   ThenGen(CGF);
2067   CGF.EmitBranch(ContBlock);
2068   // Emit the 'else' code if present.
2069   // There is no need to emit line number for unconditional branch.
2070   (void)ApplyDebugLocation::CreateEmpty(CGF);
2071   CGF.EmitBlock(ElseBlock);
2072   ElseGen(CGF);
2073   // There is no need to emit line number for unconditional branch.
2074   (void)ApplyDebugLocation::CreateEmpty(CGF);
2075   CGF.EmitBranch(ContBlock);
2076   // Emit the continuation block for code after the if.
2077   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2078 }
2079 
2080 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2081                                        llvm::Function *OutlinedFn,
2082                                        ArrayRef<llvm::Value *> CapturedVars,
2083                                        const Expr *IfCond,
2084                                        llvm::Value *NumThreads) {
2085   if (!CGF.HaveInsertPoint())
2086     return;
2087   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2088   auto &M = CGM.getModule();
2089   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2090                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2091     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2092     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2093     llvm::Value *Args[] = {
2094         RTLoc,
2095         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2096         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2097     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2098     RealArgs.append(std::begin(Args), std::end(Args));
2099     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2100 
2101     llvm::FunctionCallee RTLFn =
2102         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2103     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2104   };
2105   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2106                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2107     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2108     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2109     // Build calls:
2110     // __kmpc_serialized_parallel(&Loc, GTid);
2111     llvm::Value *Args[] = {RTLoc, ThreadID};
2112     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2113                             M, OMPRTL___kmpc_serialized_parallel),
2114                         Args);
2115 
2116     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2117     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2118     Address ZeroAddrBound =
2119         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2120                                          /*Name=*/".bound.zero.addr");
2121     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2122     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2123     // ThreadId for serialized parallels is 0.
2124     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2125     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2126     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2127 
2128     // Ensure we do not inline the function. This is trivially true for the ones
2129     // passed to __kmpc_fork_call but the ones called in serialized regions
2130     // could be inlined. This is not a perfect but it is closer to the invariant
2131     // we want, namely, every data environment starts with a new function.
2132     // TODO: We should pass the if condition to the runtime function and do the
2133     //       handling there. Much cleaner code.
2134     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2135     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2136     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2137 
2138     // __kmpc_end_serialized_parallel(&Loc, GTid);
2139     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2140     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2141                             M, OMPRTL___kmpc_end_serialized_parallel),
2142                         EndArgs);
2143   };
2144   if (IfCond) {
2145     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2146   } else {
2147     RegionCodeGenTy ThenRCG(ThenGen);
2148     ThenRCG(CGF);
2149   }
2150 }
2151 
2152 // If we're inside an (outlined) parallel region, use the region info's
2153 // thread-ID variable (it is passed in a first argument of the outlined function
2154 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2155 // regular serial code region, get thread ID by calling kmp_int32
2156 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2157 // return the address of that temp.
2158 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2159                                              SourceLocation Loc) {
2160   if (auto *OMPRegionInfo =
2161           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2162     if (OMPRegionInfo->getThreadIDVariable())
2163       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2164 
2165   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2166   QualType Int32Ty =
2167       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2168   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2169   CGF.EmitStoreOfScalar(ThreadID,
2170                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2171 
2172   return ThreadIDTemp;
2173 }
2174 
2175 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2176     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2177   SmallString<256> Buffer;
2178   llvm::raw_svector_ostream Out(Buffer);
2179   Out << Name;
2180   StringRef RuntimeName = Out.str();
2181   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2182   if (Elem.second) {
2183     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2184            "OMP internal variable has different type than requested");
2185     return &*Elem.second;
2186   }
2187 
2188   return Elem.second = new llvm::GlobalVariable(
2189              CGM.getModule(), Ty, /*IsConstant*/ false,
2190              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2191              Elem.first(), /*InsertBefore=*/nullptr,
2192              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2193 }
2194 
2195 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2196   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2197   std::string Name = getName({Prefix, "var"});
2198   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2199 }
2200 
2201 namespace {
2202 /// Common pre(post)-action for different OpenMP constructs.
2203 class CommonActionTy final : public PrePostActionTy {
2204   llvm::FunctionCallee EnterCallee;
2205   ArrayRef<llvm::Value *> EnterArgs;
2206   llvm::FunctionCallee ExitCallee;
2207   ArrayRef<llvm::Value *> ExitArgs;
2208   bool Conditional;
2209   llvm::BasicBlock *ContBlock = nullptr;
2210 
2211 public:
2212   CommonActionTy(llvm::FunctionCallee EnterCallee,
2213                  ArrayRef<llvm::Value *> EnterArgs,
2214                  llvm::FunctionCallee ExitCallee,
2215                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2216       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2217         ExitArgs(ExitArgs), Conditional(Conditional) {}
2218   void Enter(CodeGenFunction &CGF) override {
2219     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2220     if (Conditional) {
2221       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2222       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2223       ContBlock = CGF.createBasicBlock("omp_if.end");
2224       // Generate the branch (If-stmt)
2225       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2226       CGF.EmitBlock(ThenBlock);
2227     }
2228   }
2229   void Done(CodeGenFunction &CGF) {
2230     // Emit the rest of blocks/branches
2231     CGF.EmitBranch(ContBlock);
2232     CGF.EmitBlock(ContBlock, true);
2233   }
2234   void Exit(CodeGenFunction &CGF) override {
2235     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2236   }
2237 };
2238 } // anonymous namespace
2239 
2240 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2241                                          StringRef CriticalName,
2242                                          const RegionCodeGenTy &CriticalOpGen,
2243                                          SourceLocation Loc, const Expr *Hint) {
2244   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2245   // CriticalOpGen();
2246   // __kmpc_end_critical(ident_t *, gtid, Lock);
2247   // Prepare arguments and build a call to __kmpc_critical
2248   if (!CGF.HaveInsertPoint())
2249     return;
2250   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2251                          getCriticalRegionLock(CriticalName)};
2252   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2253                                                 std::end(Args));
2254   if (Hint) {
2255     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2256         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2257   }
2258   CommonActionTy Action(
2259       OMPBuilder.getOrCreateRuntimeFunction(
2260           CGM.getModule(),
2261           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2262       EnterArgs,
2263       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2264                                             OMPRTL___kmpc_end_critical),
2265       Args);
2266   CriticalOpGen.setAction(Action);
2267   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2268 }
2269 
2270 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2271                                        const RegionCodeGenTy &MasterOpGen,
2272                                        SourceLocation Loc) {
2273   if (!CGF.HaveInsertPoint())
2274     return;
2275   // if(__kmpc_master(ident_t *, gtid)) {
2276   //   MasterOpGen();
2277   //   __kmpc_end_master(ident_t *, gtid);
2278   // }
2279   // Prepare arguments and build a call to __kmpc_master
2280   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2281   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2282                             CGM.getModule(), OMPRTL___kmpc_master),
2283                         Args,
2284                         OMPBuilder.getOrCreateRuntimeFunction(
2285                             CGM.getModule(), OMPRTL___kmpc_end_master),
2286                         Args,
2287                         /*Conditional=*/true);
2288   MasterOpGen.setAction(Action);
2289   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2290   Action.Done(CGF);
2291 }
2292 
2293 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2294                                        const RegionCodeGenTy &MaskedOpGen,
2295                                        SourceLocation Loc, const Expr *Filter) {
2296   if (!CGF.HaveInsertPoint())
2297     return;
2298   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2299   //   MaskedOpGen();
2300   //   __kmpc_end_masked(iden_t *, gtid);
2301   // }
2302   // Prepare arguments and build a call to __kmpc_masked
2303   llvm::Value *FilterVal = Filter
2304                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2305                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2306   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2307                          FilterVal};
2308   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2309                             getThreadID(CGF, Loc)};
2310   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2311                             CGM.getModule(), OMPRTL___kmpc_masked),
2312                         Args,
2313                         OMPBuilder.getOrCreateRuntimeFunction(
2314                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2315                         ArgsEnd,
2316                         /*Conditional=*/true);
2317   MaskedOpGen.setAction(Action);
2318   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2319   Action.Done(CGF);
2320 }
2321 
2322 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2323                                         SourceLocation Loc) {
2324   if (!CGF.HaveInsertPoint())
2325     return;
2326   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2327     OMPBuilder.createTaskyield(CGF.Builder);
2328   } else {
2329     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2330     llvm::Value *Args[] = {
2331         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2332         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2333     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2334                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2335                         Args);
2336   }
2337 
2338   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2339     Region->emitUntiedSwitch(CGF);
2340 }
2341 
2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2343                                           const RegionCodeGenTy &TaskgroupOpGen,
2344                                           SourceLocation Loc) {
2345   if (!CGF.HaveInsertPoint())
2346     return;
2347   // __kmpc_taskgroup(ident_t *, gtid);
2348   // TaskgroupOpGen();
2349   // __kmpc_end_taskgroup(ident_t *, gtid);
2350   // Prepare arguments and build a call to __kmpc_taskgroup
2351   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2352   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2353                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2354                         Args,
2355                         OMPBuilder.getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2357                         Args);
2358   TaskgroupOpGen.setAction(Action);
2359   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2360 }
2361 
2362 /// Given an array of pointers to variables, project the address of a
2363 /// given variable.
2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2365                                       unsigned Index, const VarDecl *Var) {
2366   // Pull out the pointer to the variable.
2367   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2368   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2369 
2370   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2371   return Address(
2372       CGF.Builder.CreateBitCast(
2373           Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2374       ElemTy, CGF.getContext().getDeclAlign(Var));
2375 }
2376 
2377 static llvm::Value *emitCopyprivateCopyFunction(
2378     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2379     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2380     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2381     SourceLocation Loc) {
2382   ASTContext &C = CGM.getContext();
2383   // void copy_func(void *LHSArg, void *RHSArg);
2384   FunctionArgList Args;
2385   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2386                            ImplicitParamDecl::Other);
2387   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2388                            ImplicitParamDecl::Other);
2389   Args.push_back(&LHSArg);
2390   Args.push_back(&RHSArg);
2391   const auto &CGFI =
2392       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2393   std::string Name =
2394       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2395   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2396                                     llvm::GlobalValue::InternalLinkage, Name,
2397                                     &CGM.getModule());
2398   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2399   Fn->setDoesNotRecurse();
2400   CodeGenFunction CGF(CGM);
2401   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2402   // Dest = (void*[n])(LHSArg);
2403   // Src = (void*[n])(RHSArg);
2404   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2405                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2406                   ArgsElemType->getPointerTo()),
2407               ArgsElemType, CGF.getPointerAlign());
2408   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2409                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2410                   ArgsElemType->getPointerTo()),
2411               ArgsElemType, CGF.getPointerAlign());
2412   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2413   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2414   // ...
2415   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2416   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2417     const auto *DestVar =
2418         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2419     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2420 
2421     const auto *SrcVar =
2422         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2423     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2424 
2425     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2426     QualType Type = VD->getType();
2427     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2428   }
2429   CGF.FinishFunction();
2430   return Fn;
2431 }
2432 
2433 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2434                                        const RegionCodeGenTy &SingleOpGen,
2435                                        SourceLocation Loc,
2436                                        ArrayRef<const Expr *> CopyprivateVars,
2437                                        ArrayRef<const Expr *> SrcExprs,
2438                                        ArrayRef<const Expr *> DstExprs,
2439                                        ArrayRef<const Expr *> AssignmentOps) {
2440   if (!CGF.HaveInsertPoint())
2441     return;
2442   assert(CopyprivateVars.size() == SrcExprs.size() &&
2443          CopyprivateVars.size() == DstExprs.size() &&
2444          CopyprivateVars.size() == AssignmentOps.size());
2445   ASTContext &C = CGM.getContext();
2446   // int32 did_it = 0;
2447   // if(__kmpc_single(ident_t *, gtid)) {
2448   //   SingleOpGen();
2449   //   __kmpc_end_single(ident_t *, gtid);
2450   //   did_it = 1;
2451   // }
2452   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2453   // <copy_func>, did_it);
2454 
2455   Address DidIt = Address::invalid();
2456   if (!CopyprivateVars.empty()) {
2457     // int32 did_it = 0;
2458     QualType KmpInt32Ty =
2459         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2460     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2461     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2462   }
2463   // Prepare arguments and build a call to __kmpc_single
2464   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2465   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2466                             CGM.getModule(), OMPRTL___kmpc_single),
2467                         Args,
2468                         OMPBuilder.getOrCreateRuntimeFunction(
2469                             CGM.getModule(), OMPRTL___kmpc_end_single),
2470                         Args,
2471                         /*Conditional=*/true);
2472   SingleOpGen.setAction(Action);
2473   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2474   if (DidIt.isValid()) {
2475     // did_it = 1;
2476     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2477   }
2478   Action.Done(CGF);
2479   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2480   // <copy_func>, did_it);
2481   if (DidIt.isValid()) {
2482     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2483     QualType CopyprivateArrayTy = C.getConstantArrayType(
2484         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2485         /*IndexTypeQuals=*/0);
2486     // Create a list of all private variables for copyprivate.
2487     Address CopyprivateList =
2488         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2489     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2490       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2491       CGF.Builder.CreateStore(
2492           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2493               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2494               CGF.VoidPtrTy),
2495           Elem);
2496     }
2497     // Build function that copies private values from single region to all other
2498     // threads in the corresponding parallel region.
2499     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2500         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2501         SrcExprs, DstExprs, AssignmentOps, Loc);
2502     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2503     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2504         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2505     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2506     llvm::Value *Args[] = {
2507         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2508         getThreadID(CGF, Loc),        // i32 <gtid>
2509         BufSize,                      // size_t <buf_size>
2510         CL.getPointer(),              // void *<copyprivate list>
2511         CpyFn,                        // void (*) (void *, void *) <copy_func>
2512         DidItVal                      // i32 did_it
2513     };
2514     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2515                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2516                         Args);
2517   }
2518 }
2519 
2520 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2521                                         const RegionCodeGenTy &OrderedOpGen,
2522                                         SourceLocation Loc, bool IsThreads) {
2523   if (!CGF.HaveInsertPoint())
2524     return;
2525   // __kmpc_ordered(ident_t *, gtid);
2526   // OrderedOpGen();
2527   // __kmpc_end_ordered(ident_t *, gtid);
2528   // Prepare arguments and build a call to __kmpc_ordered
2529   if (IsThreads) {
2530     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2531     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2532                               CGM.getModule(), OMPRTL___kmpc_ordered),
2533                           Args,
2534                           OMPBuilder.getOrCreateRuntimeFunction(
2535                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2536                           Args);
2537     OrderedOpGen.setAction(Action);
2538     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2539     return;
2540   }
2541   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2542 }
2543 
2544 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2545   unsigned Flags;
2546   if (Kind == OMPD_for)
2547     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2548   else if (Kind == OMPD_sections)
2549     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2550   else if (Kind == OMPD_single)
2551     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2552   else if (Kind == OMPD_barrier)
2553     Flags = OMP_IDENT_BARRIER_EXPL;
2554   else
2555     Flags = OMP_IDENT_BARRIER_IMPL;
2556   return Flags;
2557 }
2558 
2559 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2560     CodeGenFunction &CGF, const OMPLoopDirective &S,
2561     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2562   // Check if the loop directive is actually a doacross loop directive. In this
2563   // case choose static, 1 schedule.
2564   if (llvm::any_of(
2565           S.getClausesOfKind<OMPOrderedClause>(),
2566           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2567     ScheduleKind = OMPC_SCHEDULE_static;
2568     // Chunk size is 1 in this case.
2569     llvm::APInt ChunkSize(32, 1);
2570     ChunkExpr = IntegerLiteral::Create(
2571         CGF.getContext(), ChunkSize,
2572         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2573         SourceLocation());
2574   }
2575 }
2576 
2577 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2578                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2579                                       bool ForceSimpleCall) {
2580   // Check if we should use the OMPBuilder
2581   auto *OMPRegionInfo =
2582       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2583   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2584     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2585         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2586     return;
2587   }
2588 
2589   if (!CGF.HaveInsertPoint())
2590     return;
2591   // Build call __kmpc_cancel_barrier(loc, thread_id);
2592   // Build call __kmpc_barrier(loc, thread_id);
2593   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2594   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2595   // thread_id);
2596   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2597                          getThreadID(CGF, Loc)};
2598   if (OMPRegionInfo) {
2599     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2600       llvm::Value *Result = CGF.EmitRuntimeCall(
2601           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2602                                                 OMPRTL___kmpc_cancel_barrier),
2603           Args);
2604       if (EmitChecks) {
2605         // if (__kmpc_cancel_barrier()) {
2606         //   exit from construct;
2607         // }
2608         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2609         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2610         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2611         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2612         CGF.EmitBlock(ExitBB);
2613         //   exit from construct;
2614         CodeGenFunction::JumpDest CancelDestination =
2615             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2616         CGF.EmitBranchThroughCleanup(CancelDestination);
2617         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2618       }
2619       return;
2620     }
2621   }
2622   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2623                           CGM.getModule(), OMPRTL___kmpc_barrier),
2624                       Args);
2625 }
2626 
2627 /// Map the OpenMP loop schedule to the runtime enumeration.
2628 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2629                                           bool Chunked, bool Ordered) {
2630   switch (ScheduleKind) {
2631   case OMPC_SCHEDULE_static:
2632     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2633                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2634   case OMPC_SCHEDULE_dynamic:
2635     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2636   case OMPC_SCHEDULE_guided:
2637     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2638   case OMPC_SCHEDULE_runtime:
2639     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2640   case OMPC_SCHEDULE_auto:
2641     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2642   case OMPC_SCHEDULE_unknown:
2643     assert(!Chunked && "chunk was specified but schedule kind not known");
2644     return Ordered ? OMP_ord_static : OMP_sch_static;
2645   }
2646   llvm_unreachable("Unexpected runtime schedule");
2647 }
2648 
2649 /// Map the OpenMP distribute schedule to the runtime enumeration.
2650 static OpenMPSchedType
2651 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2652   // only static is allowed for dist_schedule
2653   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2654 }
2655 
2656 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2657                                          bool Chunked) const {
2658   OpenMPSchedType Schedule =
2659       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2660   return Schedule == OMP_sch_static;
2661 }
2662 
2663 bool CGOpenMPRuntime::isStaticNonchunked(
2664     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2665   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2666   return Schedule == OMP_dist_sch_static;
2667 }
2668 
2669 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2670                                       bool Chunked) const {
2671   OpenMPSchedType Schedule =
2672       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2673   return Schedule == OMP_sch_static_chunked;
2674 }
2675 
2676 bool CGOpenMPRuntime::isStaticChunked(
2677     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2678   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2679   return Schedule == OMP_dist_sch_static_chunked;
2680 }
2681 
2682 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2683   OpenMPSchedType Schedule =
2684       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2685   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2686   return Schedule != OMP_sch_static;
2687 }
2688 
2689 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2690                                   OpenMPScheduleClauseModifier M1,
2691                                   OpenMPScheduleClauseModifier M2) {
2692   int Modifier = 0;
2693   switch (M1) {
2694   case OMPC_SCHEDULE_MODIFIER_monotonic:
2695     Modifier = OMP_sch_modifier_monotonic;
2696     break;
2697   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2698     Modifier = OMP_sch_modifier_nonmonotonic;
2699     break;
2700   case OMPC_SCHEDULE_MODIFIER_simd:
2701     if (Schedule == OMP_sch_static_chunked)
2702       Schedule = OMP_sch_static_balanced_chunked;
2703     break;
2704   case OMPC_SCHEDULE_MODIFIER_last:
2705   case OMPC_SCHEDULE_MODIFIER_unknown:
2706     break;
2707   }
2708   switch (M2) {
2709   case OMPC_SCHEDULE_MODIFIER_monotonic:
2710     Modifier = OMP_sch_modifier_monotonic;
2711     break;
2712   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2713     Modifier = OMP_sch_modifier_nonmonotonic;
2714     break;
2715   case OMPC_SCHEDULE_MODIFIER_simd:
2716     if (Schedule == OMP_sch_static_chunked)
2717       Schedule = OMP_sch_static_balanced_chunked;
2718     break;
2719   case OMPC_SCHEDULE_MODIFIER_last:
2720   case OMPC_SCHEDULE_MODIFIER_unknown:
2721     break;
2722   }
2723   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2724   // If the static schedule kind is specified or if the ordered clause is
2725   // specified, and if the nonmonotonic modifier is not specified, the effect is
2726   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2727   // modifier is specified, the effect is as if the nonmonotonic modifier is
2728   // specified.
2729   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2730     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2731           Schedule == OMP_sch_static_balanced_chunked ||
2732           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2733           Schedule == OMP_dist_sch_static_chunked ||
2734           Schedule == OMP_dist_sch_static))
2735       Modifier = OMP_sch_modifier_nonmonotonic;
2736   }
2737   return Schedule | Modifier;
2738 }
2739 
2740 void CGOpenMPRuntime::emitForDispatchInit(
2741     CodeGenFunction &CGF, SourceLocation Loc,
2742     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2743     bool Ordered, const DispatchRTInput &DispatchValues) {
2744   if (!CGF.HaveInsertPoint())
2745     return;
2746   OpenMPSchedType Schedule = getRuntimeSchedule(
2747       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2748   assert(Ordered ||
2749          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2750           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2751           Schedule != OMP_sch_static_balanced_chunked));
2752   // Call __kmpc_dispatch_init(
2753   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2754   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2755   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2756 
2757   // If the Chunk was not specified in the clause - use default value 1.
2758   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2759                                             : CGF.Builder.getIntN(IVSize, 1);
2760   llvm::Value *Args[] = {
2761       emitUpdateLocation(CGF, Loc),
2762       getThreadID(CGF, Loc),
2763       CGF.Builder.getInt32(addMonoNonMonoModifier(
2764           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2765       DispatchValues.LB,                                     // Lower
2766       DispatchValues.UB,                                     // Upper
2767       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2768       Chunk                                                  // Chunk
2769   };
2770   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2771 }
2772 
2773 static void emitForStaticInitCall(
2774     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2775     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2776     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2777     const CGOpenMPRuntime::StaticRTInput &Values) {
2778   if (!CGF.HaveInsertPoint())
2779     return;
2780 
2781   assert(!Values.Ordered);
2782   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2783          Schedule == OMP_sch_static_balanced_chunked ||
2784          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2785          Schedule == OMP_dist_sch_static ||
2786          Schedule == OMP_dist_sch_static_chunked);
2787 
2788   // Call __kmpc_for_static_init(
2789   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2790   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2791   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2792   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2793   llvm::Value *Chunk = Values.Chunk;
2794   if (Chunk == nullptr) {
2795     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2796             Schedule == OMP_dist_sch_static) &&
2797            "expected static non-chunked schedule");
2798     // If the Chunk was not specified in the clause - use default value 1.
2799     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2800   } else {
2801     assert((Schedule == OMP_sch_static_chunked ||
2802             Schedule == OMP_sch_static_balanced_chunked ||
2803             Schedule == OMP_ord_static_chunked ||
2804             Schedule == OMP_dist_sch_static_chunked) &&
2805            "expected static chunked schedule");
2806   }
2807   llvm::Value *Args[] = {
2808       UpdateLocation,
2809       ThreadId,
2810       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2811                                                   M2)), // Schedule type
2812       Values.IL.getPointer(),                           // &isLastIter
2813       Values.LB.getPointer(),                           // &LB
2814       Values.UB.getPointer(),                           // &UB
2815       Values.ST.getPointer(),                           // &Stride
2816       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2817       Chunk                                             // Chunk
2818   };
2819   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2820 }
2821 
2822 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2823                                         SourceLocation Loc,
2824                                         OpenMPDirectiveKind DKind,
2825                                         const OpenMPScheduleTy &ScheduleKind,
2826                                         const StaticRTInput &Values) {
2827   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2828       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2829   assert(isOpenMPWorksharingDirective(DKind) &&
2830          "Expected loop-based or sections-based directive.");
2831   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2832                                              isOpenMPLoopDirective(DKind)
2833                                                  ? OMP_IDENT_WORK_LOOP
2834                                                  : OMP_IDENT_WORK_SECTIONS);
2835   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2836   llvm::FunctionCallee StaticInitFunction =
2837       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2838   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2839   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2840                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2841 }
2842 
2843 void CGOpenMPRuntime::emitDistributeStaticInit(
2844     CodeGenFunction &CGF, SourceLocation Loc,
2845     OpenMPDistScheduleClauseKind SchedKind,
2846     const CGOpenMPRuntime::StaticRTInput &Values) {
2847   OpenMPSchedType ScheduleNum =
2848       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2849   llvm::Value *UpdatedLocation =
2850       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2851   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2852   llvm::FunctionCallee StaticInitFunction;
2853   bool isGPUDistribute =
2854       CGM.getLangOpts().OpenMPIsDevice &&
2855       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2856   StaticInitFunction = createForStaticInitFunction(
2857       Values.IVSize, Values.IVSigned, isGPUDistribute);
2858 
2859   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2860                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2861                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2862 }
2863 
2864 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2865                                           SourceLocation Loc,
2866                                           OpenMPDirectiveKind DKind) {
2867   if (!CGF.HaveInsertPoint())
2868     return;
2869   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2870   llvm::Value *Args[] = {
2871       emitUpdateLocation(CGF, Loc,
2872                          isOpenMPDistributeDirective(DKind)
2873                              ? OMP_IDENT_WORK_DISTRIBUTE
2874                              : isOpenMPLoopDirective(DKind)
2875                                    ? OMP_IDENT_WORK_LOOP
2876                                    : OMP_IDENT_WORK_SECTIONS),
2877       getThreadID(CGF, Loc)};
2878   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2879   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2880       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2881     CGF.EmitRuntimeCall(
2882         OMPBuilder.getOrCreateRuntimeFunction(
2883             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2884         Args);
2885   else
2886     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2887                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2888                         Args);
2889 }
2890 
2891 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2892                                                  SourceLocation Loc,
2893                                                  unsigned IVSize,
2894                                                  bool IVSigned) {
2895   if (!CGF.HaveInsertPoint())
2896     return;
2897   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2898   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2899   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2900 }
2901 
2902 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2903                                           SourceLocation Loc, unsigned IVSize,
2904                                           bool IVSigned, Address IL,
2905                                           Address LB, Address UB,
2906                                           Address ST) {
2907   // Call __kmpc_dispatch_next(
2908   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2909   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2910   //          kmp_int[32|64] *p_stride);
2911   llvm::Value *Args[] = {
2912       emitUpdateLocation(CGF, Loc),
2913       getThreadID(CGF, Loc),
2914       IL.getPointer(), // &isLastIter
2915       LB.getPointer(), // &Lower
2916       UB.getPointer(), // &Upper
2917       ST.getPointer()  // &Stride
2918   };
2919   llvm::Value *Call =
2920       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2921   return CGF.EmitScalarConversion(
2922       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2923       CGF.getContext().BoolTy, Loc);
2924 }
2925 
2926 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2927                                            llvm::Value *NumThreads,
2928                                            SourceLocation Loc) {
2929   if (!CGF.HaveInsertPoint())
2930     return;
2931   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2932   llvm::Value *Args[] = {
2933       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2934       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2935   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2936                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2937                       Args);
2938 }
2939 
2940 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2941                                          ProcBindKind ProcBind,
2942                                          SourceLocation Loc) {
2943   if (!CGF.HaveInsertPoint())
2944     return;
2945   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2946   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2947   llvm::Value *Args[] = {
2948       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2949       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2950   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2951                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2952                       Args);
2953 }
2954 
2955 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2956                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2957   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2958     OMPBuilder.createFlush(CGF.Builder);
2959   } else {
2960     if (!CGF.HaveInsertPoint())
2961       return;
2962     // Build call void __kmpc_flush(ident_t *loc)
2963     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2964                             CGM.getModule(), OMPRTL___kmpc_flush),
2965                         emitUpdateLocation(CGF, Loc));
2966   }
2967 }
2968 
2969 namespace {
2970 /// Indexes of fields for type kmp_task_t.
2971 enum KmpTaskTFields {
2972   /// List of shared variables.
2973   KmpTaskTShareds,
2974   /// Task routine.
2975   KmpTaskTRoutine,
2976   /// Partition id for the untied tasks.
2977   KmpTaskTPartId,
2978   /// Function with call of destructors for private variables.
2979   Data1,
2980   /// Task priority.
2981   Data2,
2982   /// (Taskloops only) Lower bound.
2983   KmpTaskTLowerBound,
2984   /// (Taskloops only) Upper bound.
2985   KmpTaskTUpperBound,
2986   /// (Taskloops only) Stride.
2987   KmpTaskTStride,
2988   /// (Taskloops only) Is last iteration flag.
2989   KmpTaskTLastIter,
2990   /// (Taskloops only) Reduction data.
2991   KmpTaskTReductions,
2992 };
2993 } // anonymous namespace
2994 
2995 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2996   return OffloadEntriesTargetRegion.empty() &&
2997          OffloadEntriesDeviceGlobalVar.empty();
2998 }
2999 
3000 /// Initialize target region entry.
3001 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3002     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3003                                     StringRef ParentName, unsigned LineNum,
3004                                     unsigned Order) {
3005   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3006                                              "only required for the device "
3007                                              "code generation.");
3008   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3009       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3010                                    OMPTargetRegionEntryTargetRegion);
3011   ++OffloadingEntriesNum;
3012 }
3013 
3014 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3015     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3016                                   StringRef ParentName, unsigned LineNum,
3017                                   llvm::Constant *Addr, llvm::Constant *ID,
3018                                   OMPTargetRegionEntryKind Flags) {
3019   // If we are emitting code for a target, the entry is already initialized,
3020   // only has to be registered.
3021   if (CGM.getLangOpts().OpenMPIsDevice) {
3022     // This could happen if the device compilation is invoked standalone.
3023     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3024       return;
3025     auto &Entry =
3026         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3027     Entry.setAddress(Addr);
3028     Entry.setID(ID);
3029     Entry.setFlags(Flags);
3030   } else {
3031     if (Flags ==
3032             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3033         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3034                                  /*IgnoreAddressId*/ true))
3035       return;
3036     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3037            "Target region entry already registered!");
3038     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3039     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3040     ++OffloadingEntriesNum;
3041   }
3042 }
3043 
3044 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3045     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3046     bool IgnoreAddressId) const {
3047   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3048   if (PerDevice == OffloadEntriesTargetRegion.end())
3049     return false;
3050   auto PerFile = PerDevice->second.find(FileID);
3051   if (PerFile == PerDevice->second.end())
3052     return false;
3053   auto PerParentName = PerFile->second.find(ParentName);
3054   if (PerParentName == PerFile->second.end())
3055     return false;
3056   auto PerLine = PerParentName->second.find(LineNum);
3057   if (PerLine == PerParentName->second.end())
3058     return false;
3059   // Fail if this entry is already registered.
3060   if (!IgnoreAddressId &&
3061       (PerLine->second.getAddress() || PerLine->second.getID()))
3062     return false;
3063   return true;
3064 }
3065 
3066 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3067     const OffloadTargetRegionEntryInfoActTy &Action) {
3068   // Scan all target region entries and perform the provided action.
3069   for (const auto &D : OffloadEntriesTargetRegion)
3070     for (const auto &F : D.second)
3071       for (const auto &P : F.second)
3072         for (const auto &L : P.second)
3073           Action(D.first, F.first, P.first(), L.first, L.second);
3074 }
3075 
3076 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3077     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3078                                        OMPTargetGlobalVarEntryKind Flags,
3079                                        unsigned Order) {
3080   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3081                                              "only required for the device "
3082                                              "code generation.");
3083   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3084   ++OffloadingEntriesNum;
3085 }
3086 
3087 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3088     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3089                                      CharUnits VarSize,
3090                                      OMPTargetGlobalVarEntryKind Flags,
3091                                      llvm::GlobalValue::LinkageTypes Linkage) {
3092   if (CGM.getLangOpts().OpenMPIsDevice) {
3093     // This could happen if the device compilation is invoked standalone.
3094     if (!hasDeviceGlobalVarEntryInfo(VarName))
3095       return;
3096     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3097     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3098       if (Entry.getVarSize().isZero()) {
3099         Entry.setVarSize(VarSize);
3100         Entry.setLinkage(Linkage);
3101       }
3102       return;
3103     }
3104     Entry.setVarSize(VarSize);
3105     Entry.setLinkage(Linkage);
3106     Entry.setAddress(Addr);
3107   } else {
3108     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3109       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3110       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3111              "Entry not initialized!");
3112       if (Entry.getVarSize().isZero()) {
3113         Entry.setVarSize(VarSize);
3114         Entry.setLinkage(Linkage);
3115       }
3116       return;
3117     }
3118     OffloadEntriesDeviceGlobalVar.try_emplace(
3119         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3120     ++OffloadingEntriesNum;
3121   }
3122 }
3123 
3124 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3125     actOnDeviceGlobalVarEntriesInfo(
3126         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3127   // Scan all target region entries and perform the provided action.
3128   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3129     Action(E.getKey(), E.getValue());
3130 }
3131 
3132 void CGOpenMPRuntime::createOffloadEntry(
3133     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3134     llvm::GlobalValue::LinkageTypes Linkage) {
3135   StringRef Name = Addr->getName();
3136   llvm::Module &M = CGM.getModule();
3137   llvm::LLVMContext &C = M.getContext();
3138 
3139   // Create constant string with the name.
3140   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3141 
3142   std::string StringName = getName({"omp_offloading", "entry_name"});
3143   auto *Str = new llvm::GlobalVariable(
3144       M, StrPtrInit->getType(), /*isConstant=*/true,
3145       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3146   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3147 
3148   llvm::Constant *Data[] = {
3149       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3150       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3151       llvm::ConstantInt::get(CGM.SizeTy, Size),
3152       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3153       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3154   std::string EntryName = getName({"omp_offloading", "entry", ""});
3155   llvm::GlobalVariable *Entry = createGlobalStruct(
3156       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3157       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3158 
3159   // The entry has to be created in the section the linker expects it to be.
3160   Entry->setSection("omp_offloading_entries");
3161 }
3162 
3163 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3164   // Emit the offloading entries and metadata so that the device codegen side
3165   // can easily figure out what to emit. The produced metadata looks like
3166   // this:
3167   //
3168   // !omp_offload.info = !{!1, ...}
3169   //
3170   // Right now we only generate metadata for function that contain target
3171   // regions.
3172 
3173   // If we are in simd mode or there are no entries, we don't need to do
3174   // anything.
3175   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3176     return;
3177 
3178   llvm::Module &M = CGM.getModule();
3179   llvm::LLVMContext &C = M.getContext();
3180   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3181                          SourceLocation, StringRef>,
3182               16>
3183       OrderedEntries(OffloadEntriesInfoManager.size());
3184   llvm::SmallVector<StringRef, 16> ParentFunctions(
3185       OffloadEntriesInfoManager.size());
3186 
3187   // Auxiliary methods to create metadata values and strings.
3188   auto &&GetMDInt = [this](unsigned V) {
3189     return llvm::ConstantAsMetadata::get(
3190         llvm::ConstantInt::get(CGM.Int32Ty, V));
3191   };
3192 
3193   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3194 
3195   // Create the offloading info metadata node.
3196   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3197 
3198   // Create function that emits metadata for each target region entry;
3199   auto &&TargetRegionMetadataEmitter =
3200       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3201        &GetMDString](
3202           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3203           unsigned Line,
3204           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3205         // Generate metadata for target regions. Each entry of this metadata
3206         // contains:
3207         // - Entry 0 -> Kind of this type of metadata (0).
3208         // - Entry 1 -> Device ID of the file where the entry was identified.
3209         // - Entry 2 -> File ID of the file where the entry was identified.
3210         // - Entry 3 -> Mangled name of the function where the entry was
3211         // identified.
3212         // - Entry 4 -> Line in the file where the entry was identified.
3213         // - Entry 5 -> Order the entry was created.
3214         // The first element of the metadata node is the kind.
3215         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3216                                  GetMDInt(FileID),      GetMDString(ParentName),
3217                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3218 
3219         SourceLocation Loc;
3220         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3221                   E = CGM.getContext().getSourceManager().fileinfo_end();
3222              I != E; ++I) {
3223           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3224               I->getFirst()->getUniqueID().getFile() == FileID) {
3225             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3226                 I->getFirst(), Line, 1);
3227             break;
3228           }
3229         }
3230         // Save this entry in the right position of the ordered entries array.
3231         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3232         ParentFunctions[E.getOrder()] = ParentName;
3233 
3234         // Add metadata to the named metadata node.
3235         MD->addOperand(llvm::MDNode::get(C, Ops));
3236       };
3237 
3238   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3239       TargetRegionMetadataEmitter);
3240 
3241   // Create function that emits metadata for each device global variable entry;
3242   auto &&DeviceGlobalVarMetadataEmitter =
3243       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3244        MD](StringRef MangledName,
3245            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3246                &E) {
3247         // Generate metadata for global variables. Each entry of this metadata
3248         // contains:
3249         // - Entry 0 -> Kind of this type of metadata (1).
3250         // - Entry 1 -> Mangled name of the variable.
3251         // - Entry 2 -> Declare target kind.
3252         // - Entry 3 -> Order the entry was created.
3253         // The first element of the metadata node is the kind.
3254         llvm::Metadata *Ops[] = {
3255             GetMDInt(E.getKind()), GetMDString(MangledName),
3256             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3257 
3258         // Save this entry in the right position of the ordered entries array.
3259         OrderedEntries[E.getOrder()] =
3260             std::make_tuple(&E, SourceLocation(), MangledName);
3261 
3262         // Add metadata to the named metadata node.
3263         MD->addOperand(llvm::MDNode::get(C, Ops));
3264       };
3265 
3266   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3267       DeviceGlobalVarMetadataEmitter);
3268 
3269   for (const auto &E : OrderedEntries) {
3270     assert(std::get<0>(E) && "All ordered entries must exist!");
3271     if (const auto *CE =
3272             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3273                 std::get<0>(E))) {
3274       if (!CE->getID() || !CE->getAddress()) {
3275         // Do not blame the entry if the parent funtion is not emitted.
3276         StringRef FnName = ParentFunctions[CE->getOrder()];
3277         if (!CGM.GetGlobalValue(FnName))
3278           continue;
3279         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3280             DiagnosticsEngine::Error,
3281             "Offloading entry for target region in %0 is incorrect: either the "
3282             "address or the ID is invalid.");
3283         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3284         continue;
3285       }
3286       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3287                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3288     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3289                                              OffloadEntryInfoDeviceGlobalVar>(
3290                    std::get<0>(E))) {
3291       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3292           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3293               CE->getFlags());
3294       switch (Flags) {
3295       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3296         if (CGM.getLangOpts().OpenMPIsDevice &&
3297             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3298           continue;
3299         if (!CE->getAddress()) {
3300           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3301               DiagnosticsEngine::Error, "Offloading entry for declare target "
3302                                         "variable %0 is incorrect: the "
3303                                         "address is invalid.");
3304           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3305           continue;
3306         }
3307         // The vaiable has no definition - no need to add the entry.
3308         if (CE->getVarSize().isZero())
3309           continue;
3310         break;
3311       }
3312       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3313         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3314                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3315                "Declaret target link address is set.");
3316         if (CGM.getLangOpts().OpenMPIsDevice)
3317           continue;
3318         if (!CE->getAddress()) {
3319           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3320               DiagnosticsEngine::Error,
3321               "Offloading entry for declare target variable is incorrect: the "
3322               "address is invalid.");
3323           CGM.getDiags().Report(DiagID);
3324           continue;
3325         }
3326         break;
3327       }
3328       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3329                          CE->getVarSize().getQuantity(), Flags,
3330                          CE->getLinkage());
3331     } else {
3332       llvm_unreachable("Unsupported entry kind.");
3333     }
3334   }
3335 }
3336 
3337 /// Loads all the offload entries information from the host IR
3338 /// metadata.
3339 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3340   // If we are in target mode, load the metadata from the host IR. This code has
3341   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3342 
3343   if (!CGM.getLangOpts().OpenMPIsDevice)
3344     return;
3345 
3346   if (CGM.getLangOpts().OMPHostIRFile.empty())
3347     return;
3348 
3349   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3350   if (auto EC = Buf.getError()) {
3351     CGM.getDiags().Report(diag::err_cannot_open_file)
3352         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3353     return;
3354   }
3355 
3356   llvm::LLVMContext C;
3357   auto ME = expectedToErrorOrAndEmitErrors(
3358       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3359 
3360   if (auto EC = ME.getError()) {
3361     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3362         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3363     CGM.getDiags().Report(DiagID)
3364         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3365     return;
3366   }
3367 
3368   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3369   if (!MD)
3370     return;
3371 
3372   for (llvm::MDNode *MN : MD->operands()) {
3373     auto &&GetMDInt = [MN](unsigned Idx) {
3374       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3375       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3376     };
3377 
3378     auto &&GetMDString = [MN](unsigned Idx) {
3379       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3380       return V->getString();
3381     };
3382 
3383     switch (GetMDInt(0)) {
3384     default:
3385       llvm_unreachable("Unexpected metadata!");
3386       break;
3387     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3388         OffloadingEntryInfoTargetRegion:
3389       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3390           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3391           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3392           /*Order=*/GetMDInt(5));
3393       break;
3394     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3395         OffloadingEntryInfoDeviceGlobalVar:
3396       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3397           /*MangledName=*/GetMDString(1),
3398           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3399               /*Flags=*/GetMDInt(2)),
3400           /*Order=*/GetMDInt(3));
3401       break;
3402     }
3403   }
3404 }
3405 
3406 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3407   if (!KmpRoutineEntryPtrTy) {
3408     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3409     ASTContext &C = CGM.getContext();
3410     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3411     FunctionProtoType::ExtProtoInfo EPI;
3412     KmpRoutineEntryPtrQTy = C.getPointerType(
3413         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3414     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3415   }
3416 }
3417 
3418 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3419   // Make sure the type of the entry is already created. This is the type we
3420   // have to create:
3421   // struct __tgt_offload_entry{
3422   //   void      *addr;       // Pointer to the offload entry info.
3423   //                          // (function or global)
3424   //   char      *name;       // Name of the function or global.
3425   //   size_t     size;       // Size of the entry info (0 if it a function).
3426   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3427   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3428   // };
3429   if (TgtOffloadEntryQTy.isNull()) {
3430     ASTContext &C = CGM.getContext();
3431     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3432     RD->startDefinition();
3433     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3434     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3435     addFieldToRecordDecl(C, RD, C.getSizeType());
3436     addFieldToRecordDecl(
3437         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3438     addFieldToRecordDecl(
3439         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3440     RD->completeDefinition();
3441     RD->addAttr(PackedAttr::CreateImplicit(C));
3442     TgtOffloadEntryQTy = C.getRecordType(RD);
3443   }
3444   return TgtOffloadEntryQTy;
3445 }
3446 
3447 namespace {
3448 struct PrivateHelpersTy {
3449   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3450                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3451       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3452         PrivateElemInit(PrivateElemInit) {}
3453   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3454   const Expr *OriginalRef = nullptr;
3455   const VarDecl *Original = nullptr;
3456   const VarDecl *PrivateCopy = nullptr;
3457   const VarDecl *PrivateElemInit = nullptr;
3458   bool isLocalPrivate() const {
3459     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3460   }
3461 };
3462 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3463 } // anonymous namespace
3464 
3465 static bool isAllocatableDecl(const VarDecl *VD) {
3466   const VarDecl *CVD = VD->getCanonicalDecl();
3467   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3468     return false;
3469   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3470   // Use the default allocation.
3471   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3472            !AA->getAllocator());
3473 }
3474 
3475 static RecordDecl *
3476 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3477   if (!Privates.empty()) {
3478     ASTContext &C = CGM.getContext();
3479     // Build struct .kmp_privates_t. {
3480     //         /*  private vars  */
3481     //       };
3482     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3483     RD->startDefinition();
3484     for (const auto &Pair : Privates) {
3485       const VarDecl *VD = Pair.second.Original;
3486       QualType Type = VD->getType().getNonReferenceType();
3487       // If the private variable is a local variable with lvalue ref type,
3488       // allocate the pointer instead of the pointee type.
3489       if (Pair.second.isLocalPrivate()) {
3490         if (VD->getType()->isLValueReferenceType())
3491           Type = C.getPointerType(Type);
3492         if (isAllocatableDecl(VD))
3493           Type = C.getPointerType(Type);
3494       }
3495       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3496       if (VD->hasAttrs()) {
3497         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3498              E(VD->getAttrs().end());
3499              I != E; ++I)
3500           FD->addAttr(*I);
3501       }
3502     }
3503     RD->completeDefinition();
3504     return RD;
3505   }
3506   return nullptr;
3507 }
3508 
3509 static RecordDecl *
3510 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3511                          QualType KmpInt32Ty,
3512                          QualType KmpRoutineEntryPointerQTy) {
3513   ASTContext &C = CGM.getContext();
3514   // Build struct kmp_task_t {
3515   //         void *              shareds;
3516   //         kmp_routine_entry_t routine;
3517   //         kmp_int32           part_id;
3518   //         kmp_cmplrdata_t data1;
3519   //         kmp_cmplrdata_t data2;
3520   // For taskloops additional fields:
3521   //         kmp_uint64          lb;
3522   //         kmp_uint64          ub;
3523   //         kmp_int64           st;
3524   //         kmp_int32           liter;
3525   //         void *              reductions;
3526   //       };
3527   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3528   UD->startDefinition();
3529   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3530   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3531   UD->completeDefinition();
3532   QualType KmpCmplrdataTy = C.getRecordType(UD);
3533   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3534   RD->startDefinition();
3535   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3536   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3537   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3538   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3539   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3540   if (isOpenMPTaskLoopDirective(Kind)) {
3541     QualType KmpUInt64Ty =
3542         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3543     QualType KmpInt64Ty =
3544         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3545     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3546     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3547     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3548     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3549     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3550   }
3551   RD->completeDefinition();
3552   return RD;
3553 }
3554 
3555 static RecordDecl *
3556 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3557                                      ArrayRef<PrivateDataTy> Privates) {
3558   ASTContext &C = CGM.getContext();
3559   // Build struct kmp_task_t_with_privates {
3560   //         kmp_task_t task_data;
3561   //         .kmp_privates_t. privates;
3562   //       };
3563   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3564   RD->startDefinition();
3565   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3566   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3567     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3568   RD->completeDefinition();
3569   return RD;
3570 }
3571 
3572 /// Emit a proxy function which accepts kmp_task_t as the second
3573 /// argument.
3574 /// \code
3575 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3576 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3577 ///   For taskloops:
3578 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3579 ///   tt->reductions, tt->shareds);
3580 ///   return 0;
3581 /// }
3582 /// \endcode
3583 static llvm::Function *
3584 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3585                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3586                       QualType KmpTaskTWithPrivatesPtrQTy,
3587                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3588                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3589                       llvm::Value *TaskPrivatesMap) {
3590   ASTContext &C = CGM.getContext();
3591   FunctionArgList Args;
3592   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3593                             ImplicitParamDecl::Other);
3594   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3595                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3596                                 ImplicitParamDecl::Other);
3597   Args.push_back(&GtidArg);
3598   Args.push_back(&TaskTypeArg);
3599   const auto &TaskEntryFnInfo =
3600       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3601   llvm::FunctionType *TaskEntryTy =
3602       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3603   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3604   auto *TaskEntry = llvm::Function::Create(
3605       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3606   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3607   TaskEntry->setDoesNotRecurse();
3608   CodeGenFunction CGF(CGM);
3609   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3610                     Loc, Loc);
3611 
3612   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3613   // tt,
3614   // For taskloops:
3615   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3616   // tt->task_data.shareds);
3617   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3618       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3619   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3620       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3621       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3622   const auto *KmpTaskTWithPrivatesQTyRD =
3623       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3624   LValue Base =
3625       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3626   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3627   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3628   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3629   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3630 
3631   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3632   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3633   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3634       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3635       CGF.ConvertTypeForMem(SharedsPtrTy));
3636 
3637   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3638   llvm::Value *PrivatesParam;
3639   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3640     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3641     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3642         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3643   } else {
3644     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3645   }
3646 
3647   llvm::Value *CommonArgs[] = {
3648       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3649       CGF.Builder
3650           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3651                                                CGF.VoidPtrTy, CGF.Int8Ty)
3652           .getPointer()};
3653   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3654                                           std::end(CommonArgs));
3655   if (isOpenMPTaskLoopDirective(Kind)) {
3656     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3657     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3658     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3659     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3660     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3661     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3662     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3663     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3664     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3665     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3666     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3667     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3668     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3669     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3670     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3671     CallArgs.push_back(LBParam);
3672     CallArgs.push_back(UBParam);
3673     CallArgs.push_back(StParam);
3674     CallArgs.push_back(LIParam);
3675     CallArgs.push_back(RParam);
3676   }
3677   CallArgs.push_back(SharedsParam);
3678 
3679   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3680                                                   CallArgs);
3681   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3682                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3683   CGF.FinishFunction();
3684   return TaskEntry;
3685 }
3686 
3687 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3688                                             SourceLocation Loc,
3689                                             QualType KmpInt32Ty,
3690                                             QualType KmpTaskTWithPrivatesPtrQTy,
3691                                             QualType KmpTaskTWithPrivatesQTy) {
3692   ASTContext &C = CGM.getContext();
3693   FunctionArgList Args;
3694   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3695                             ImplicitParamDecl::Other);
3696   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3697                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3698                                 ImplicitParamDecl::Other);
3699   Args.push_back(&GtidArg);
3700   Args.push_back(&TaskTypeArg);
3701   const auto &DestructorFnInfo =
3702       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3703   llvm::FunctionType *DestructorFnTy =
3704       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3705   std::string Name =
3706       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3707   auto *DestructorFn =
3708       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3709                              Name, &CGM.getModule());
3710   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3711                                     DestructorFnInfo);
3712   DestructorFn->setDoesNotRecurse();
3713   CodeGenFunction CGF(CGM);
3714   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3715                     Args, Loc, Loc);
3716 
3717   LValue Base = CGF.EmitLoadOfPointerLValue(
3718       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3719       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3720   const auto *KmpTaskTWithPrivatesQTyRD =
3721       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3722   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3723   Base = CGF.EmitLValueForField(Base, *FI);
3724   for (const auto *Field :
3725        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3726     if (QualType::DestructionKind DtorKind =
3727             Field->getType().isDestructedType()) {
3728       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3729       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3730     }
3731   }
3732   CGF.FinishFunction();
3733   return DestructorFn;
3734 }
3735 
3736 /// Emit a privates mapping function for correct handling of private and
3737 /// firstprivate variables.
3738 /// \code
3739 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3740 /// **noalias priv1,...,  <tyn> **noalias privn) {
3741 ///   *priv1 = &.privates.priv1;
3742 ///   ...;
3743 ///   *privn = &.privates.privn;
3744 /// }
3745 /// \endcode
3746 static llvm::Value *
3747 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3748                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3749                                ArrayRef<PrivateDataTy> Privates) {
3750   ASTContext &C = CGM.getContext();
3751   FunctionArgList Args;
3752   ImplicitParamDecl TaskPrivatesArg(
3753       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3754       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3755       ImplicitParamDecl::Other);
3756   Args.push_back(&TaskPrivatesArg);
3757   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3758   unsigned Counter = 1;
3759   for (const Expr *E : Data.PrivateVars) {
3760     Args.push_back(ImplicitParamDecl::Create(
3761         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3762         C.getPointerType(C.getPointerType(E->getType()))
3763             .withConst()
3764             .withRestrict(),
3765         ImplicitParamDecl::Other));
3766     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3767     PrivateVarsPos[VD] = Counter;
3768     ++Counter;
3769   }
3770   for (const Expr *E : Data.FirstprivateVars) {
3771     Args.push_back(ImplicitParamDecl::Create(
3772         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3773         C.getPointerType(C.getPointerType(E->getType()))
3774             .withConst()
3775             .withRestrict(),
3776         ImplicitParamDecl::Other));
3777     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3778     PrivateVarsPos[VD] = Counter;
3779     ++Counter;
3780   }
3781   for (const Expr *E : Data.LastprivateVars) {
3782     Args.push_back(ImplicitParamDecl::Create(
3783         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3784         C.getPointerType(C.getPointerType(E->getType()))
3785             .withConst()
3786             .withRestrict(),
3787         ImplicitParamDecl::Other));
3788     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3789     PrivateVarsPos[VD] = Counter;
3790     ++Counter;
3791   }
3792   for (const VarDecl *VD : Data.PrivateLocals) {
3793     QualType Ty = VD->getType().getNonReferenceType();
3794     if (VD->getType()->isLValueReferenceType())
3795       Ty = C.getPointerType(Ty);
3796     if (isAllocatableDecl(VD))
3797       Ty = C.getPointerType(Ty);
3798     Args.push_back(ImplicitParamDecl::Create(
3799         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3800         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3801         ImplicitParamDecl::Other));
3802     PrivateVarsPos[VD] = Counter;
3803     ++Counter;
3804   }
3805   const auto &TaskPrivatesMapFnInfo =
3806       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3807   llvm::FunctionType *TaskPrivatesMapTy =
3808       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3809   std::string Name =
3810       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3811   auto *TaskPrivatesMap = llvm::Function::Create(
3812       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3813       &CGM.getModule());
3814   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3815                                     TaskPrivatesMapFnInfo);
3816   if (CGM.getLangOpts().Optimize) {
3817     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3818     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3819     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3820   }
3821   CodeGenFunction CGF(CGM);
3822   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3823                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3824 
3825   // *privi = &.privates.privi;
3826   LValue Base = CGF.EmitLoadOfPointerLValue(
3827       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3828       TaskPrivatesArg.getType()->castAs<PointerType>());
3829   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3830   Counter = 0;
3831   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3832     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3833     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3834     LValue RefLVal =
3835         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3836     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3837         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3838     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3839     ++Counter;
3840   }
3841   CGF.FinishFunction();
3842   return TaskPrivatesMap;
3843 }
3844 
3845 /// Emit initialization for private variables in task-based directives.
3846 static void emitPrivatesInit(CodeGenFunction &CGF,
3847                              const OMPExecutableDirective &D,
3848                              Address KmpTaskSharedsPtr, LValue TDBase,
3849                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3850                              QualType SharedsTy, QualType SharedsPtrTy,
3851                              const OMPTaskDataTy &Data,
3852                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3853   ASTContext &C = CGF.getContext();
3854   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3855   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3856   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3857                                  ? OMPD_taskloop
3858                                  : OMPD_task;
3859   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3860   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3861   LValue SrcBase;
3862   bool IsTargetTask =
3863       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3864       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3865   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3866   // PointersArray, SizesArray, and MappersArray. The original variables for
3867   // these arrays are not captured and we get their addresses explicitly.
3868   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3869       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3870     SrcBase = CGF.MakeAddrLValue(
3871         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3872             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3873             CGF.ConvertTypeForMem(SharedsTy)),
3874         SharedsTy);
3875   }
3876   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3877   for (const PrivateDataTy &Pair : Privates) {
3878     // Do not initialize private locals.
3879     if (Pair.second.isLocalPrivate()) {
3880       ++FI;
3881       continue;
3882     }
3883     const VarDecl *VD = Pair.second.PrivateCopy;
3884     const Expr *Init = VD->getAnyInitializer();
3885     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3886                              !CGF.isTrivialInitializer(Init)))) {
3887       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3888       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3889         const VarDecl *OriginalVD = Pair.second.Original;
3890         // Check if the variable is the target-based BasePointersArray,
3891         // PointersArray, SizesArray, or MappersArray.
3892         LValue SharedRefLValue;
3893         QualType Type = PrivateLValue.getType();
3894         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3895         if (IsTargetTask && !SharedField) {
3896           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3897                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3898                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3899                          ->getNumParams() == 0 &&
3900                  isa<TranslationUnitDecl>(
3901                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3902                          ->getDeclContext()) &&
3903                  "Expected artificial target data variable.");
3904           SharedRefLValue =
3905               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3906         } else if (ForDup) {
3907           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3908           SharedRefLValue = CGF.MakeAddrLValue(
3909               SharedRefLValue.getAddress(CGF).withAlignment(
3910                   C.getDeclAlign(OriginalVD)),
3911               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3912               SharedRefLValue.getTBAAInfo());
3913         } else if (CGF.LambdaCaptureFields.count(
3914                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3915                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3916           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3917         } else {
3918           // Processing for implicitly captured variables.
3919           InlinedOpenMPRegionRAII Region(
3920               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3921               /*HasCancel=*/false, /*NoInheritance=*/true);
3922           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3923         }
3924         if (Type->isArrayType()) {
3925           // Initialize firstprivate array.
3926           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3927             // Perform simple memcpy.
3928             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3929           } else {
3930             // Initialize firstprivate array using element-by-element
3931             // initialization.
3932             CGF.EmitOMPAggregateAssign(
3933                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3934                 Type,
3935                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3936                                                   Address SrcElement) {
3937                   // Clean up any temporaries needed by the initialization.
3938                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3939                   InitScope.addPrivate(Elem, SrcElement);
3940                   (void)InitScope.Privatize();
3941                   // Emit initialization for single element.
3942                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3943                       CGF, &CapturesInfo);
3944                   CGF.EmitAnyExprToMem(Init, DestElement,
3945                                        Init->getType().getQualifiers(),
3946                                        /*IsInitializer=*/false);
3947                 });
3948           }
3949         } else {
3950           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3951           InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3952           (void)InitScope.Privatize();
3953           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3954           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3955                              /*capturedByInit=*/false);
3956         }
3957       } else {
3958         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3959       }
3960     }
3961     ++FI;
3962   }
3963 }
3964 
3965 /// Check if duplication function is required for taskloops.
3966 static bool checkInitIsRequired(CodeGenFunction &CGF,
3967                                 ArrayRef<PrivateDataTy> Privates) {
3968   bool InitRequired = false;
3969   for (const PrivateDataTy &Pair : Privates) {
3970     if (Pair.second.isLocalPrivate())
3971       continue;
3972     const VarDecl *VD = Pair.second.PrivateCopy;
3973     const Expr *Init = VD->getAnyInitializer();
3974     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3975                                     !CGF.isTrivialInitializer(Init));
3976     if (InitRequired)
3977       break;
3978   }
3979   return InitRequired;
3980 }
3981 
3982 
3983 /// Emit task_dup function (for initialization of
3984 /// private/firstprivate/lastprivate vars and last_iter flag)
3985 /// \code
3986 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3987 /// lastpriv) {
3988 /// // setup lastprivate flag
3989 ///    task_dst->last = lastpriv;
3990 /// // could be constructor calls here...
3991 /// }
3992 /// \endcode
3993 static llvm::Value *
3994 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3995                     const OMPExecutableDirective &D,
3996                     QualType KmpTaskTWithPrivatesPtrQTy,
3997                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3998                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3999                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4000                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4001   ASTContext &C = CGM.getContext();
4002   FunctionArgList Args;
4003   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4004                            KmpTaskTWithPrivatesPtrQTy,
4005                            ImplicitParamDecl::Other);
4006   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4007                            KmpTaskTWithPrivatesPtrQTy,
4008                            ImplicitParamDecl::Other);
4009   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4010                                 ImplicitParamDecl::Other);
4011   Args.push_back(&DstArg);
4012   Args.push_back(&SrcArg);
4013   Args.push_back(&LastprivArg);
4014   const auto &TaskDupFnInfo =
4015       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4016   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4017   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4018   auto *TaskDup = llvm::Function::Create(
4019       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4020   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4021   TaskDup->setDoesNotRecurse();
4022   CodeGenFunction CGF(CGM);
4023   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4024                     Loc);
4025 
4026   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4027       CGF.GetAddrOfLocalVar(&DstArg),
4028       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4029   // task_dst->liter = lastpriv;
4030   if (WithLastIter) {
4031     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4032     LValue Base = CGF.EmitLValueForField(
4033         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4034     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4035     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4036         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4037     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4038   }
4039 
4040   // Emit initial values for private copies (if any).
4041   assert(!Privates.empty());
4042   Address KmpTaskSharedsPtr = Address::invalid();
4043   if (!Data.FirstprivateVars.empty()) {
4044     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4045         CGF.GetAddrOfLocalVar(&SrcArg),
4046         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4047     LValue Base = CGF.EmitLValueForField(
4048         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4049     KmpTaskSharedsPtr = Address(
4050         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4051                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4052                                                   KmpTaskTShareds)),
4053                              Loc),
4054         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4055   }
4056   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4057                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4058   CGF.FinishFunction();
4059   return TaskDup;
4060 }
4061 
4062 /// Checks if destructor function is required to be generated.
4063 /// \return true if cleanups are required, false otherwise.
4064 static bool
4065 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4066                          ArrayRef<PrivateDataTy> Privates) {
4067   for (const PrivateDataTy &P : Privates) {
4068     if (P.second.isLocalPrivate())
4069       continue;
4070     QualType Ty = P.second.Original->getType().getNonReferenceType();
4071     if (Ty.isDestructedType())
4072       return true;
4073   }
4074   return false;
4075 }
4076 
4077 namespace {
4078 /// Loop generator for OpenMP iterator expression.
4079 class OMPIteratorGeneratorScope final
4080     : public CodeGenFunction::OMPPrivateScope {
4081   CodeGenFunction &CGF;
4082   const OMPIteratorExpr *E = nullptr;
4083   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4084   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4085   OMPIteratorGeneratorScope() = delete;
4086   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4087 
4088 public:
4089   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4090       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4091     if (!E)
4092       return;
4093     SmallVector<llvm::Value *, 4> Uppers;
4094     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4095       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4096       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4097       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
4098       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4099       addPrivate(
4100           HelperData.CounterVD,
4101           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
4102     }
4103     Privatize();
4104 
4105     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4106       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4107       LValue CLVal =
4108           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4109                              HelperData.CounterVD->getType());
4110       // Counter = 0;
4111       CGF.EmitStoreOfScalar(
4112           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4113           CLVal);
4114       CodeGenFunction::JumpDest &ContDest =
4115           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4116       CodeGenFunction::JumpDest &ExitDest =
4117           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4118       // N = <number-of_iterations>;
4119       llvm::Value *N = Uppers[I];
4120       // cont:
4121       // if (Counter < N) goto body; else goto exit;
4122       CGF.EmitBlock(ContDest.getBlock());
4123       auto *CVal =
4124           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4125       llvm::Value *Cmp =
4126           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4127               ? CGF.Builder.CreateICmpSLT(CVal, N)
4128               : CGF.Builder.CreateICmpULT(CVal, N);
4129       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4130       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4131       // body:
4132       CGF.EmitBlock(BodyBB);
4133       // Iteri = Begini + Counter * Stepi;
4134       CGF.EmitIgnoredExpr(HelperData.Update);
4135     }
4136   }
4137   ~OMPIteratorGeneratorScope() {
4138     if (!E)
4139       return;
4140     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4141       // Counter = Counter + 1;
4142       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4143       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4144       // goto cont;
4145       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4146       // exit:
4147       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4148     }
4149   }
4150 };
4151 } // namespace
4152 
4153 static std::pair<llvm::Value *, llvm::Value *>
4154 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4155   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4156   llvm::Value *Addr;
4157   if (OASE) {
4158     const Expr *Base = OASE->getBase();
4159     Addr = CGF.EmitScalarExpr(Base);
4160   } else {
4161     Addr = CGF.EmitLValue(E).getPointer(CGF);
4162   }
4163   llvm::Value *SizeVal;
4164   QualType Ty = E->getType();
4165   if (OASE) {
4166     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4167     for (const Expr *SE : OASE->getDimensions()) {
4168       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4169       Sz = CGF.EmitScalarConversion(
4170           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4171       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4172     }
4173   } else if (const auto *ASE =
4174                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4175     LValue UpAddrLVal =
4176         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4177     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4178     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4179         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4180     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4181     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4182     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4183   } else {
4184     SizeVal = CGF.getTypeSize(Ty);
4185   }
4186   return std::make_pair(Addr, SizeVal);
4187 }
4188 
4189 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4190 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4191   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4192   if (KmpTaskAffinityInfoTy.isNull()) {
4193     RecordDecl *KmpAffinityInfoRD =
4194         C.buildImplicitRecord("kmp_task_affinity_info_t");
4195     KmpAffinityInfoRD->startDefinition();
4196     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4197     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4198     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4199     KmpAffinityInfoRD->completeDefinition();
4200     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4201   }
4202 }
4203 
4204 CGOpenMPRuntime::TaskResultTy
4205 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4206                               const OMPExecutableDirective &D,
4207                               llvm::Function *TaskFunction, QualType SharedsTy,
4208                               Address Shareds, const OMPTaskDataTy &Data) {
4209   ASTContext &C = CGM.getContext();
4210   llvm::SmallVector<PrivateDataTy, 4> Privates;
4211   // Aggregate privates and sort them by the alignment.
4212   const auto *I = Data.PrivateCopies.begin();
4213   for (const Expr *E : Data.PrivateVars) {
4214     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4215     Privates.emplace_back(
4216         C.getDeclAlign(VD),
4217         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4218                          /*PrivateElemInit=*/nullptr));
4219     ++I;
4220   }
4221   I = Data.FirstprivateCopies.begin();
4222   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4223   for (const Expr *E : Data.FirstprivateVars) {
4224     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4225     Privates.emplace_back(
4226         C.getDeclAlign(VD),
4227         PrivateHelpersTy(
4228             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4229             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4230     ++I;
4231     ++IElemInitRef;
4232   }
4233   I = Data.LastprivateCopies.begin();
4234   for (const Expr *E : Data.LastprivateVars) {
4235     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4236     Privates.emplace_back(
4237         C.getDeclAlign(VD),
4238         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4239                          /*PrivateElemInit=*/nullptr));
4240     ++I;
4241   }
4242   for (const VarDecl *VD : Data.PrivateLocals) {
4243     if (isAllocatableDecl(VD))
4244       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4245     else
4246       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4247   }
4248   llvm::stable_sort(Privates,
4249                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4250                       return L.first > R.first;
4251                     });
4252   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4253   // Build type kmp_routine_entry_t (if not built yet).
4254   emitKmpRoutineEntryT(KmpInt32Ty);
4255   // Build type kmp_task_t (if not built yet).
4256   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4257     if (SavedKmpTaskloopTQTy.isNull()) {
4258       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4259           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4260     }
4261     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4262   } else {
4263     assert((D.getDirectiveKind() == OMPD_task ||
4264             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4265             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4266            "Expected taskloop, task or target directive");
4267     if (SavedKmpTaskTQTy.isNull()) {
4268       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4269           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4270     }
4271     KmpTaskTQTy = SavedKmpTaskTQTy;
4272   }
4273   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4274   // Build particular struct kmp_task_t for the given task.
4275   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4276       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4277   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4278   QualType KmpTaskTWithPrivatesPtrQTy =
4279       C.getPointerType(KmpTaskTWithPrivatesQTy);
4280   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4281   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4282       KmpTaskTWithPrivatesTy->getPointerTo();
4283   llvm::Value *KmpTaskTWithPrivatesTySize =
4284       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4285   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4286 
4287   // Emit initial values for private copies (if any).
4288   llvm::Value *TaskPrivatesMap = nullptr;
4289   llvm::Type *TaskPrivatesMapTy =
4290       std::next(TaskFunction->arg_begin(), 3)->getType();
4291   if (!Privates.empty()) {
4292     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4293     TaskPrivatesMap =
4294         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4295     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4296         TaskPrivatesMap, TaskPrivatesMapTy);
4297   } else {
4298     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4299         cast<llvm::PointerType>(TaskPrivatesMapTy));
4300   }
4301   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4302   // kmp_task_t *tt);
4303   llvm::Function *TaskEntry = emitProxyTaskFunction(
4304       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4305       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4306       TaskPrivatesMap);
4307 
4308   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4309   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4310   // kmp_routine_entry_t *task_entry);
4311   // Task flags. Format is taken from
4312   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4313   // description of kmp_tasking_flags struct.
4314   enum {
4315     TiedFlag = 0x1,
4316     FinalFlag = 0x2,
4317     DestructorsFlag = 0x8,
4318     PriorityFlag = 0x20,
4319     DetachableFlag = 0x40,
4320   };
4321   unsigned Flags = Data.Tied ? TiedFlag : 0;
4322   bool NeedsCleanup = false;
4323   if (!Privates.empty()) {
4324     NeedsCleanup =
4325         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4326     if (NeedsCleanup)
4327       Flags = Flags | DestructorsFlag;
4328   }
4329   if (Data.Priority.getInt())
4330     Flags = Flags | PriorityFlag;
4331   if (D.hasClausesOfKind<OMPDetachClause>())
4332     Flags = Flags | DetachableFlag;
4333   llvm::Value *TaskFlags =
4334       Data.Final.getPointer()
4335           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4336                                      CGF.Builder.getInt32(FinalFlag),
4337                                      CGF.Builder.getInt32(/*C=*/0))
4338           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4339   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4340   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4341   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4342       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4343       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4344           TaskEntry, KmpRoutineEntryPtrTy)};
4345   llvm::Value *NewTask;
4346   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4347     // Check if we have any device clause associated with the directive.
4348     const Expr *Device = nullptr;
4349     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4350       Device = C->getDevice();
4351     // Emit device ID if any otherwise use default value.
4352     llvm::Value *DeviceID;
4353     if (Device)
4354       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4355                                            CGF.Int64Ty, /*isSigned=*/true);
4356     else
4357       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4358     AllocArgs.push_back(DeviceID);
4359     NewTask = CGF.EmitRuntimeCall(
4360         OMPBuilder.getOrCreateRuntimeFunction(
4361             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4362         AllocArgs);
4363   } else {
4364     NewTask =
4365         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4366                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4367                             AllocArgs);
4368   }
4369   // Emit detach clause initialization.
4370   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4371   // task_descriptor);
4372   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4373     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4374     LValue EvtLVal = CGF.EmitLValue(Evt);
4375 
4376     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4377     // int gtid, kmp_task_t *task);
4378     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4379     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4380     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4381     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4382         OMPBuilder.getOrCreateRuntimeFunction(
4383             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4384         {Loc, Tid, NewTask});
4385     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4386                                       Evt->getExprLoc());
4387     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4388   }
4389   // Process affinity clauses.
4390   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4391     // Process list of affinity data.
4392     ASTContext &C = CGM.getContext();
4393     Address AffinitiesArray = Address::invalid();
4394     // Calculate number of elements to form the array of affinity data.
4395     llvm::Value *NumOfElements = nullptr;
4396     unsigned NumAffinities = 0;
4397     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4398       if (const Expr *Modifier = C->getModifier()) {
4399         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4400         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4401           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4402           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4403           NumOfElements =
4404               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4405         }
4406       } else {
4407         NumAffinities += C->varlist_size();
4408       }
4409     }
4410     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4411     // Fields ids in kmp_task_affinity_info record.
4412     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4413 
4414     QualType KmpTaskAffinityInfoArrayTy;
4415     if (NumOfElements) {
4416       NumOfElements = CGF.Builder.CreateNUWAdd(
4417           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4418       auto *OVE = new (C) OpaqueValueExpr(
4419           Loc,
4420           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4421           VK_PRValue);
4422       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4423                                                     RValue::get(NumOfElements));
4424       KmpTaskAffinityInfoArrayTy =
4425           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4426                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4427       // Properly emit variable-sized array.
4428       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4429                                            ImplicitParamDecl::Other);
4430       CGF.EmitVarDecl(*PD);
4431       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4432       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4433                                                 /*isSigned=*/false);
4434     } else {
4435       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4436           KmpTaskAffinityInfoTy,
4437           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4438           ArrayType::Normal, /*IndexTypeQuals=*/0);
4439       AffinitiesArray =
4440           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4441       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4442       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4443                                              /*isSigned=*/false);
4444     }
4445 
4446     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4447     // Fill array by elements without iterators.
4448     unsigned Pos = 0;
4449     bool HasIterator = false;
4450     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4451       if (C->getModifier()) {
4452         HasIterator = true;
4453         continue;
4454       }
4455       for (const Expr *E : C->varlists()) {
4456         llvm::Value *Addr;
4457         llvm::Value *Size;
4458         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4459         LValue Base =
4460             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4461                                KmpTaskAffinityInfoTy);
4462         // affs[i].base_addr = &<Affinities[i].second>;
4463         LValue BaseAddrLVal = CGF.EmitLValueForField(
4464             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4465         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4466                               BaseAddrLVal);
4467         // affs[i].len = sizeof(<Affinities[i].second>);
4468         LValue LenLVal = CGF.EmitLValueForField(
4469             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4470         CGF.EmitStoreOfScalar(Size, LenLVal);
4471         ++Pos;
4472       }
4473     }
4474     LValue PosLVal;
4475     if (HasIterator) {
4476       PosLVal = CGF.MakeAddrLValue(
4477           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4478           C.getSizeType());
4479       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4480     }
4481     // Process elements with iterators.
4482     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4483       const Expr *Modifier = C->getModifier();
4484       if (!Modifier)
4485         continue;
4486       OMPIteratorGeneratorScope IteratorScope(
4487           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4488       for (const Expr *E : C->varlists()) {
4489         llvm::Value *Addr;
4490         llvm::Value *Size;
4491         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4492         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4493         LValue Base = CGF.MakeAddrLValue(
4494             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4495         // affs[i].base_addr = &<Affinities[i].second>;
4496         LValue BaseAddrLVal = CGF.EmitLValueForField(
4497             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4498         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4499                               BaseAddrLVal);
4500         // affs[i].len = sizeof(<Affinities[i].second>);
4501         LValue LenLVal = CGF.EmitLValueForField(
4502             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4503         CGF.EmitStoreOfScalar(Size, LenLVal);
4504         Idx = CGF.Builder.CreateNUWAdd(
4505             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4506         CGF.EmitStoreOfScalar(Idx, PosLVal);
4507       }
4508     }
4509     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4510     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4511     // naffins, kmp_task_affinity_info_t *affin_list);
4512     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4513     llvm::Value *GTid = getThreadID(CGF, Loc);
4514     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4515         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4516     // FIXME: Emit the function and ignore its result for now unless the
4517     // runtime function is properly implemented.
4518     (void)CGF.EmitRuntimeCall(
4519         OMPBuilder.getOrCreateRuntimeFunction(
4520             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4521         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4522   }
4523   llvm::Value *NewTaskNewTaskTTy =
4524       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4525           NewTask, KmpTaskTWithPrivatesPtrTy);
4526   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4527                                                KmpTaskTWithPrivatesQTy);
4528   LValue TDBase =
4529       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4530   // Fill the data in the resulting kmp_task_t record.
4531   // Copy shareds if there are any.
4532   Address KmpTaskSharedsPtr = Address::invalid();
4533   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4534     KmpTaskSharedsPtr = Address(
4535         CGF.EmitLoadOfScalar(
4536             CGF.EmitLValueForField(
4537                 TDBase,
4538                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4539             Loc),
4540         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4541     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4542     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4543     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4544   }
4545   // Emit initial values for private copies (if any).
4546   TaskResultTy Result;
4547   if (!Privates.empty()) {
4548     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4549                      SharedsTy, SharedsPtrTy, Data, Privates,
4550                      /*ForDup=*/false);
4551     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4552         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4553       Result.TaskDupFn = emitTaskDupFunction(
4554           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4555           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4556           /*WithLastIter=*/!Data.LastprivateVars.empty());
4557     }
4558   }
4559   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4560   enum { Priority = 0, Destructors = 1 };
4561   // Provide pointer to function with destructors for privates.
4562   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4563   const RecordDecl *KmpCmplrdataUD =
4564       (*FI)->getType()->getAsUnionType()->getDecl();
4565   if (NeedsCleanup) {
4566     llvm::Value *DestructorFn = emitDestructorsFunction(
4567         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4568         KmpTaskTWithPrivatesQTy);
4569     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4570     LValue DestructorsLV = CGF.EmitLValueForField(
4571         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4572     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4573                               DestructorFn, KmpRoutineEntryPtrTy),
4574                           DestructorsLV);
4575   }
4576   // Set priority.
4577   if (Data.Priority.getInt()) {
4578     LValue Data2LV = CGF.EmitLValueForField(
4579         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4580     LValue PriorityLV = CGF.EmitLValueForField(
4581         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4582     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4583   }
4584   Result.NewTask = NewTask;
4585   Result.TaskEntry = TaskEntry;
4586   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4587   Result.TDBase = TDBase;
4588   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4589   return Result;
4590 }
4591 
4592 namespace {
4593 /// Dependence kind for RTL.
4594 enum RTLDependenceKindTy {
4595   DepIn = 0x01,
4596   DepInOut = 0x3,
4597   DepMutexInOutSet = 0x4,
4598   DepInOutSet = 0x8
4599 };
4600 /// Fields ids in kmp_depend_info record.
4601 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4602 } // namespace
4603 
4604 /// Translates internal dependency kind into the runtime kind.
4605 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4606   RTLDependenceKindTy DepKind;
4607   switch (K) {
4608   case OMPC_DEPEND_in:
4609     DepKind = DepIn;
4610     break;
4611   // Out and InOut dependencies must use the same code.
4612   case OMPC_DEPEND_out:
4613   case OMPC_DEPEND_inout:
4614     DepKind = DepInOut;
4615     break;
4616   case OMPC_DEPEND_mutexinoutset:
4617     DepKind = DepMutexInOutSet;
4618     break;
4619   case OMPC_DEPEND_inoutset:
4620     DepKind = DepInOutSet;
4621     break;
4622   case OMPC_DEPEND_source:
4623   case OMPC_DEPEND_sink:
4624   case OMPC_DEPEND_depobj:
4625   case OMPC_DEPEND_unknown:
4626     llvm_unreachable("Unknown task dependence type");
4627   }
4628   return DepKind;
4629 }
4630 
4631 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4632 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4633                            QualType &FlagsTy) {
4634   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4635   if (KmpDependInfoTy.isNull()) {
4636     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4637     KmpDependInfoRD->startDefinition();
4638     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4639     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4640     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4641     KmpDependInfoRD->completeDefinition();
4642     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4643   }
4644 }
4645 
4646 std::pair<llvm::Value *, LValue>
4647 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4648                                    SourceLocation Loc) {
4649   ASTContext &C = CGM.getContext();
4650   QualType FlagsTy;
4651   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4652   RecordDecl *KmpDependInfoRD =
4653       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4654   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4655   LValue Base = CGF.EmitLoadOfPointerLValue(
4656       CGF.Builder.CreateElementBitCast(
4657           DepobjLVal.getAddress(CGF),
4658           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4659       KmpDependInfoPtrTy->castAs<PointerType>());
4660   Address DepObjAddr = CGF.Builder.CreateGEP(
4661       Base.getAddress(CGF),
4662       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4663   LValue NumDepsBase = CGF.MakeAddrLValue(
4664       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4665   // NumDeps = deps[i].base_addr;
4666   LValue BaseAddrLVal = CGF.EmitLValueForField(
4667       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4668   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4669   return std::make_pair(NumDeps, Base);
4670 }
4671 
4672 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4673                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4674                            const OMPTaskDataTy::DependData &Data,
4675                            Address DependenciesArray) {
4676   CodeGenModule &CGM = CGF.CGM;
4677   ASTContext &C = CGM.getContext();
4678   QualType FlagsTy;
4679   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4680   RecordDecl *KmpDependInfoRD =
4681       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4682   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4683 
4684   OMPIteratorGeneratorScope IteratorScope(
4685       CGF, cast_or_null<OMPIteratorExpr>(
4686                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4687                                  : nullptr));
4688   for (const Expr *E : Data.DepExprs) {
4689     llvm::Value *Addr;
4690     llvm::Value *Size;
4691     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4692     LValue Base;
4693     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4694       Base = CGF.MakeAddrLValue(
4695           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4696     } else {
4697       LValue &PosLVal = *Pos.get<LValue *>();
4698       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4699       Base = CGF.MakeAddrLValue(
4700           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4701     }
4702     // deps[i].base_addr = &<Dependencies[i].second>;
4703     LValue BaseAddrLVal = CGF.EmitLValueForField(
4704         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4705     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4706                           BaseAddrLVal);
4707     // deps[i].len = sizeof(<Dependencies[i].second>);
4708     LValue LenLVal = CGF.EmitLValueForField(
4709         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4710     CGF.EmitStoreOfScalar(Size, LenLVal);
4711     // deps[i].flags = <Dependencies[i].first>;
4712     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4713     LValue FlagsLVal = CGF.EmitLValueForField(
4714         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4715     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4716                           FlagsLVal);
4717     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4718       ++(*P);
4719     } else {
4720       LValue &PosLVal = *Pos.get<LValue *>();
4721       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4722       Idx = CGF.Builder.CreateNUWAdd(Idx,
4723                                      llvm::ConstantInt::get(Idx->getType(), 1));
4724       CGF.EmitStoreOfScalar(Idx, PosLVal);
4725     }
4726   }
4727 }
4728 
4729 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4730     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4731     const OMPTaskDataTy::DependData &Data) {
4732   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4733          "Expected depobj dependecy kind.");
4734   SmallVector<llvm::Value *, 4> Sizes;
4735   SmallVector<LValue, 4> SizeLVals;
4736   ASTContext &C = CGF.getContext();
4737   {
4738     OMPIteratorGeneratorScope IteratorScope(
4739         CGF, cast_or_null<OMPIteratorExpr>(
4740                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4741                                    : nullptr));
4742     for (const Expr *E : Data.DepExprs) {
4743       llvm::Value *NumDeps;
4744       LValue Base;
4745       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4746       std::tie(NumDeps, Base) =
4747           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4748       LValue NumLVal = CGF.MakeAddrLValue(
4749           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4750           C.getUIntPtrType());
4751       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4752                               NumLVal.getAddress(CGF));
4753       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4754       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4755       CGF.EmitStoreOfScalar(Add, NumLVal);
4756       SizeLVals.push_back(NumLVal);
4757     }
4758   }
4759   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4760     llvm::Value *Size =
4761         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4762     Sizes.push_back(Size);
4763   }
4764   return Sizes;
4765 }
4766 
4767 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4768                                          QualType &KmpDependInfoTy,
4769                                          LValue PosLVal,
4770                                          const OMPTaskDataTy::DependData &Data,
4771                                          Address DependenciesArray) {
4772   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4773          "Expected depobj dependecy kind.");
4774   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4775   {
4776     OMPIteratorGeneratorScope IteratorScope(
4777         CGF, cast_or_null<OMPIteratorExpr>(
4778                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4779                                    : nullptr));
4780     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4781       const Expr *E = Data.DepExprs[I];
4782       llvm::Value *NumDeps;
4783       LValue Base;
4784       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4785       std::tie(NumDeps, Base) =
4786           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4787 
4788       // memcopy dependency data.
4789       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4790           ElSize,
4791           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4792       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4793       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4794       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4795 
4796       // Increase pos.
4797       // pos += size;
4798       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4799       CGF.EmitStoreOfScalar(Add, PosLVal);
4800     }
4801   }
4802 }
4803 
4804 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4805     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4806     SourceLocation Loc) {
4807   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4808         return D.DepExprs.empty();
4809       }))
4810     return std::make_pair(nullptr, Address::invalid());
4811   // Process list of dependencies.
4812   ASTContext &C = CGM.getContext();
4813   Address DependenciesArray = Address::invalid();
4814   llvm::Value *NumOfElements = nullptr;
4815   unsigned NumDependencies = std::accumulate(
4816       Dependencies.begin(), Dependencies.end(), 0,
4817       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4818         return D.DepKind == OMPC_DEPEND_depobj
4819                    ? V
4820                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4821       });
4822   QualType FlagsTy;
4823   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4824   bool HasDepobjDeps = false;
4825   bool HasRegularWithIterators = false;
4826   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4827   llvm::Value *NumOfRegularWithIterators =
4828       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4829   // Calculate number of depobj dependecies and regular deps with the iterators.
4830   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4831     if (D.DepKind == OMPC_DEPEND_depobj) {
4832       SmallVector<llvm::Value *, 4> Sizes =
4833           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4834       for (llvm::Value *Size : Sizes) {
4835         NumOfDepobjElements =
4836             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4837       }
4838       HasDepobjDeps = true;
4839       continue;
4840     }
4841     // Include number of iterations, if any.
4842 
4843     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4844       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4845         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4846         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4847         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4848             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4849         NumOfRegularWithIterators =
4850             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4851       }
4852       HasRegularWithIterators = true;
4853       continue;
4854     }
4855   }
4856 
4857   QualType KmpDependInfoArrayTy;
4858   if (HasDepobjDeps || HasRegularWithIterators) {
4859     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4860                                            /*isSigned=*/false);
4861     if (HasDepobjDeps) {
4862       NumOfElements =
4863           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4864     }
4865     if (HasRegularWithIterators) {
4866       NumOfElements =
4867           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4868     }
4869     auto *OVE = new (C) OpaqueValueExpr(
4870         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4871         VK_PRValue);
4872     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4873                                                   RValue::get(NumOfElements));
4874     KmpDependInfoArrayTy =
4875         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4876                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4877     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4878     // Properly emit variable-sized array.
4879     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4880                                          ImplicitParamDecl::Other);
4881     CGF.EmitVarDecl(*PD);
4882     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4883     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4884                                               /*isSigned=*/false);
4885   } else {
4886     KmpDependInfoArrayTy = C.getConstantArrayType(
4887         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4888         ArrayType::Normal, /*IndexTypeQuals=*/0);
4889     DependenciesArray =
4890         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4891     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4892     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4893                                            /*isSigned=*/false);
4894   }
4895   unsigned Pos = 0;
4896   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4897     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4898         Dependencies[I].IteratorExpr)
4899       continue;
4900     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4901                    DependenciesArray);
4902   }
4903   // Copy regular dependecies with iterators.
4904   LValue PosLVal = CGF.MakeAddrLValue(
4905       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4906   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4907   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4908     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4909         !Dependencies[I].IteratorExpr)
4910       continue;
4911     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4912                    DependenciesArray);
4913   }
4914   // Copy final depobj arrays without iterators.
4915   if (HasDepobjDeps) {
4916     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4917       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4918         continue;
4919       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4920                          DependenciesArray);
4921     }
4922   }
4923   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4924       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4925   return std::make_pair(NumOfElements, DependenciesArray);
4926 }
4927 
4928 Address CGOpenMPRuntime::emitDepobjDependClause(
4929     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4930     SourceLocation Loc) {
4931   if (Dependencies.DepExprs.empty())
4932     return Address::invalid();
4933   // Process list of dependencies.
4934   ASTContext &C = CGM.getContext();
4935   Address DependenciesArray = Address::invalid();
4936   unsigned NumDependencies = Dependencies.DepExprs.size();
4937   QualType FlagsTy;
4938   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4939   RecordDecl *KmpDependInfoRD =
4940       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4941 
4942   llvm::Value *Size;
4943   // Define type kmp_depend_info[<Dependencies.size()>];
4944   // For depobj reserve one extra element to store the number of elements.
4945   // It is required to handle depobj(x) update(in) construct.
4946   // kmp_depend_info[<Dependencies.size()>] deps;
4947   llvm::Value *NumDepsVal;
4948   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4949   if (const auto *IE =
4950           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4951     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4952     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4953       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4954       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4955       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4956     }
4957     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4958                                     NumDepsVal);
4959     CharUnits SizeInBytes =
4960         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4961     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4962     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4963     NumDepsVal =
4964         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4965   } else {
4966     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4967         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4968         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4969     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4970     Size = CGM.getSize(Sz.alignTo(Align));
4971     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4972   }
4973   // Need to allocate on the dynamic memory.
4974   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4975   // Use default allocator.
4976   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4977   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4978 
4979   llvm::Value *Addr =
4980       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4981                               CGM.getModule(), OMPRTL___kmpc_alloc),
4982                           Args, ".dep.arr.addr");
4983   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4984   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4985       Addr, KmpDependInfoLlvmTy->getPointerTo());
4986   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4987   // Write number of elements in the first element of array for depobj.
4988   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4989   // deps[i].base_addr = NumDependencies;
4990   LValue BaseAddrLVal = CGF.EmitLValueForField(
4991       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4992   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4993   llvm::PointerUnion<unsigned *, LValue *> Pos;
4994   unsigned Idx = 1;
4995   LValue PosLVal;
4996   if (Dependencies.IteratorExpr) {
4997     PosLVal = CGF.MakeAddrLValue(
4998         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4999         C.getSizeType());
5000     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5001                           /*IsInit=*/true);
5002     Pos = &PosLVal;
5003   } else {
5004     Pos = &Idx;
5005   }
5006   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5007   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5008       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
5009       CGF.Int8Ty);
5010   return DependenciesArray;
5011 }
5012 
5013 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5014                                         SourceLocation Loc) {
5015   ASTContext &C = CGM.getContext();
5016   QualType FlagsTy;
5017   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5018   LValue Base = CGF.EmitLoadOfPointerLValue(
5019       DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
5020   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5021   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5022       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
5023       CGF.ConvertTypeForMem(KmpDependInfoTy));
5024   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5025       Addr.getElementType(), Addr.getPointer(),
5026       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5027   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5028                                                                CGF.VoidPtrTy);
5029   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5030   // Use default allocator.
5031   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5032   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5033 
5034   // _kmpc_free(gtid, addr, nullptr);
5035   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5036                                 CGM.getModule(), OMPRTL___kmpc_free),
5037                             Args);
5038 }
5039 
5040 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5041                                        OpenMPDependClauseKind NewDepKind,
5042                                        SourceLocation Loc) {
5043   ASTContext &C = CGM.getContext();
5044   QualType FlagsTy;
5045   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5046   RecordDecl *KmpDependInfoRD =
5047       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5048   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5049   llvm::Value *NumDeps;
5050   LValue Base;
5051   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5052 
5053   Address Begin = Base.getAddress(CGF);
5054   // Cast from pointer to array type to pointer to single element.
5055   llvm::Value *End = CGF.Builder.CreateGEP(
5056       Begin.getElementType(), Begin.getPointer(), NumDeps);
5057   // The basic structure here is a while-do loop.
5058   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5059   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5060   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5061   CGF.EmitBlock(BodyBB);
5062   llvm::PHINode *ElementPHI =
5063       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5064   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5065   Begin = Begin.withPointer(ElementPHI);
5066   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5067                             Base.getTBAAInfo());
5068   // deps[i].flags = NewDepKind;
5069   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5070   LValue FlagsLVal = CGF.EmitLValueForField(
5071       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5072   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5073                         FlagsLVal);
5074 
5075   // Shift the address forward by one element.
5076   Address ElementNext =
5077       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5078   ElementPHI->addIncoming(ElementNext.getPointer(),
5079                           CGF.Builder.GetInsertBlock());
5080   llvm::Value *IsEmpty =
5081       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5082   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5083   // Done.
5084   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5085 }
5086 
5087 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5088                                    const OMPExecutableDirective &D,
5089                                    llvm::Function *TaskFunction,
5090                                    QualType SharedsTy, Address Shareds,
5091                                    const Expr *IfCond,
5092                                    const OMPTaskDataTy &Data) {
5093   if (!CGF.HaveInsertPoint())
5094     return;
5095 
5096   TaskResultTy Result =
5097       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5098   llvm::Value *NewTask = Result.NewTask;
5099   llvm::Function *TaskEntry = Result.TaskEntry;
5100   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5101   LValue TDBase = Result.TDBase;
5102   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5103   // Process list of dependences.
5104   Address DependenciesArray = Address::invalid();
5105   llvm::Value *NumOfElements;
5106   std::tie(NumOfElements, DependenciesArray) =
5107       emitDependClause(CGF, Data.Dependences, Loc);
5108 
5109   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5110   // libcall.
5111   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5112   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5113   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5114   // list is not empty
5115   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5116   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5117   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5118   llvm::Value *DepTaskArgs[7];
5119   if (!Data.Dependences.empty()) {
5120     DepTaskArgs[0] = UpLoc;
5121     DepTaskArgs[1] = ThreadID;
5122     DepTaskArgs[2] = NewTask;
5123     DepTaskArgs[3] = NumOfElements;
5124     DepTaskArgs[4] = DependenciesArray.getPointer();
5125     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5126     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5127   }
5128   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5129                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5130     if (!Data.Tied) {
5131       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5132       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5133       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5134     }
5135     if (!Data.Dependences.empty()) {
5136       CGF.EmitRuntimeCall(
5137           OMPBuilder.getOrCreateRuntimeFunction(
5138               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5139           DepTaskArgs);
5140     } else {
5141       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5142                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5143                           TaskArgs);
5144     }
5145     // Check if parent region is untied and build return for untied task;
5146     if (auto *Region =
5147             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5148       Region->emitUntiedSwitch(CGF);
5149   };
5150 
5151   llvm::Value *DepWaitTaskArgs[6];
5152   if (!Data.Dependences.empty()) {
5153     DepWaitTaskArgs[0] = UpLoc;
5154     DepWaitTaskArgs[1] = ThreadID;
5155     DepWaitTaskArgs[2] = NumOfElements;
5156     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5157     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5158     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5159   }
5160   auto &M = CGM.getModule();
5161   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5162                         TaskEntry, &Data, &DepWaitTaskArgs,
5163                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5164     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5165     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5166     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5167     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5168     // is specified.
5169     if (!Data.Dependences.empty())
5170       CGF.EmitRuntimeCall(
5171           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5172           DepWaitTaskArgs);
5173     // Call proxy_task_entry(gtid, new_task);
5174     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5175                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5176       Action.Enter(CGF);
5177       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5178       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5179                                                           OutlinedFnArgs);
5180     };
5181 
5182     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5183     // kmp_task_t *new_task);
5184     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5185     // kmp_task_t *new_task);
5186     RegionCodeGenTy RCG(CodeGen);
5187     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5188                               M, OMPRTL___kmpc_omp_task_begin_if0),
5189                           TaskArgs,
5190                           OMPBuilder.getOrCreateRuntimeFunction(
5191                               M, OMPRTL___kmpc_omp_task_complete_if0),
5192                           TaskArgs);
5193     RCG.setAction(Action);
5194     RCG(CGF);
5195   };
5196 
5197   if (IfCond) {
5198     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5199   } else {
5200     RegionCodeGenTy ThenRCG(ThenCodeGen);
5201     ThenRCG(CGF);
5202   }
5203 }
5204 
5205 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5206                                        const OMPLoopDirective &D,
5207                                        llvm::Function *TaskFunction,
5208                                        QualType SharedsTy, Address Shareds,
5209                                        const Expr *IfCond,
5210                                        const OMPTaskDataTy &Data) {
5211   if (!CGF.HaveInsertPoint())
5212     return;
5213   TaskResultTy Result =
5214       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5215   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5216   // libcall.
5217   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5218   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5219   // sched, kmp_uint64 grainsize, void *task_dup);
5220   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5221   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5222   llvm::Value *IfVal;
5223   if (IfCond) {
5224     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5225                                       /*isSigned=*/true);
5226   } else {
5227     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5228   }
5229 
5230   LValue LBLVal = CGF.EmitLValueForField(
5231       Result.TDBase,
5232       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5233   const auto *LBVar =
5234       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5235   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5236                        LBLVal.getQuals(),
5237                        /*IsInitializer=*/true);
5238   LValue UBLVal = CGF.EmitLValueForField(
5239       Result.TDBase,
5240       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5241   const auto *UBVar =
5242       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5243   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5244                        UBLVal.getQuals(),
5245                        /*IsInitializer=*/true);
5246   LValue StLVal = CGF.EmitLValueForField(
5247       Result.TDBase,
5248       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5249   const auto *StVar =
5250       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5251   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5252                        StLVal.getQuals(),
5253                        /*IsInitializer=*/true);
5254   // Store reductions address.
5255   LValue RedLVal = CGF.EmitLValueForField(
5256       Result.TDBase,
5257       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5258   if (Data.Reductions) {
5259     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5260   } else {
5261     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5262                                CGF.getContext().VoidPtrTy);
5263   }
5264   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5265   llvm::Value *TaskArgs[] = {
5266       UpLoc,
5267       ThreadID,
5268       Result.NewTask,
5269       IfVal,
5270       LBLVal.getPointer(CGF),
5271       UBLVal.getPointer(CGF),
5272       CGF.EmitLoadOfScalar(StLVal, Loc),
5273       llvm::ConstantInt::getSigned(
5274           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5275       llvm::ConstantInt::getSigned(
5276           CGF.IntTy, Data.Schedule.getPointer()
5277                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5278                          : NoSchedule),
5279       Data.Schedule.getPointer()
5280           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5281                                       /*isSigned=*/false)
5282           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5283       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5284                              Result.TaskDupFn, CGF.VoidPtrTy)
5285                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5286   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5287                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5288                       TaskArgs);
5289 }
5290 
5291 /// Emit reduction operation for each element of array (required for
5292 /// array sections) LHS op = RHS.
5293 /// \param Type Type of array.
5294 /// \param LHSVar Variable on the left side of the reduction operation
5295 /// (references element of array in original variable).
5296 /// \param RHSVar Variable on the right side of the reduction operation
5297 /// (references element of array in original variable).
5298 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5299 /// RHSVar.
5300 static void EmitOMPAggregateReduction(
5301     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5302     const VarDecl *RHSVar,
5303     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5304                                   const Expr *, const Expr *)> &RedOpGen,
5305     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5306     const Expr *UpExpr = nullptr) {
5307   // Perform element-by-element initialization.
5308   QualType ElementTy;
5309   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5310   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5311 
5312   // Drill down to the base element type on both arrays.
5313   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5314   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5315 
5316   llvm::Value *RHSBegin = RHSAddr.getPointer();
5317   llvm::Value *LHSBegin = LHSAddr.getPointer();
5318   // Cast from pointer to array type to pointer to single element.
5319   llvm::Value *LHSEnd =
5320       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5321   // The basic structure here is a while-do loop.
5322   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5323   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5324   llvm::Value *IsEmpty =
5325       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5326   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5327 
5328   // Enter the loop body, making that address the current address.
5329   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5330   CGF.EmitBlock(BodyBB);
5331 
5332   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5333 
5334   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5335       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5336   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5337   Address RHSElementCurrent(
5338       RHSElementPHI, RHSAddr.getElementType(),
5339       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5340 
5341   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5342       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5343   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5344   Address LHSElementCurrent(
5345       LHSElementPHI, LHSAddr.getElementType(),
5346       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5347 
5348   // Emit copy.
5349   CodeGenFunction::OMPPrivateScope Scope(CGF);
5350   Scope.addPrivate(LHSVar, LHSElementCurrent);
5351   Scope.addPrivate(RHSVar, RHSElementCurrent);
5352   Scope.Privatize();
5353   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5354   Scope.ForceCleanup();
5355 
5356   // Shift the address forward by one element.
5357   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5358       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5359       "omp.arraycpy.dest.element");
5360   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5361       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5362       "omp.arraycpy.src.element");
5363   // Check whether we've reached the end.
5364   llvm::Value *Done =
5365       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5366   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5367   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5368   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5369 
5370   // Done.
5371   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5372 }
5373 
5374 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5375 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5376 /// UDR combiner function.
5377 static void emitReductionCombiner(CodeGenFunction &CGF,
5378                                   const Expr *ReductionOp) {
5379   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5380     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5381       if (const auto *DRE =
5382               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5383         if (const auto *DRD =
5384                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5385           std::pair<llvm::Function *, llvm::Function *> Reduction =
5386               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5387           RValue Func = RValue::get(Reduction.first);
5388           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5389           CGF.EmitIgnoredExpr(ReductionOp);
5390           return;
5391         }
5392   CGF.EmitIgnoredExpr(ReductionOp);
5393 }
5394 
5395 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5396     SourceLocation Loc, llvm::Type *ArgsElemType,
5397     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5398     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5399   ASTContext &C = CGM.getContext();
5400 
5401   // void reduction_func(void *LHSArg, void *RHSArg);
5402   FunctionArgList Args;
5403   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5404                            ImplicitParamDecl::Other);
5405   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5406                            ImplicitParamDecl::Other);
5407   Args.push_back(&LHSArg);
5408   Args.push_back(&RHSArg);
5409   const auto &CGFI =
5410       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5411   std::string Name = getName({"omp", "reduction", "reduction_func"});
5412   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5413                                     llvm::GlobalValue::InternalLinkage, Name,
5414                                     &CGM.getModule());
5415   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5416   Fn->setDoesNotRecurse();
5417   CodeGenFunction CGF(CGM);
5418   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5419 
5420   // Dst = (void*[n])(LHSArg);
5421   // Src = (void*[n])(RHSArg);
5422   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5423                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5424                   ArgsElemType->getPointerTo()),
5425               ArgsElemType, CGF.getPointerAlign());
5426   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5427                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5428                   ArgsElemType->getPointerTo()),
5429               ArgsElemType, CGF.getPointerAlign());
5430 
5431   //  ...
5432   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5433   //  ...
5434   CodeGenFunction::OMPPrivateScope Scope(CGF);
5435   const auto *IPriv = Privates.begin();
5436   unsigned Idx = 0;
5437   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5438     const auto *RHSVar =
5439         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5440     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5441     const auto *LHSVar =
5442         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5443     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5444     QualType PrivTy = (*IPriv)->getType();
5445     if (PrivTy->isVariablyModifiedType()) {
5446       // Get array size and emit VLA type.
5447       ++Idx;
5448       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5449       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5450       const VariableArrayType *VLA =
5451           CGF.getContext().getAsVariableArrayType(PrivTy);
5452       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5453       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5454           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5455       CGF.EmitVariablyModifiedType(PrivTy);
5456     }
5457   }
5458   Scope.Privatize();
5459   IPriv = Privates.begin();
5460   const auto *ILHS = LHSExprs.begin();
5461   const auto *IRHS = RHSExprs.begin();
5462   for (const Expr *E : ReductionOps) {
5463     if ((*IPriv)->getType()->isArrayType()) {
5464       // Emit reduction for array section.
5465       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5466       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5467       EmitOMPAggregateReduction(
5468           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5469           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5470             emitReductionCombiner(CGF, E);
5471           });
5472     } else {
5473       // Emit reduction for array subscript or single variable.
5474       emitReductionCombiner(CGF, E);
5475     }
5476     ++IPriv;
5477     ++ILHS;
5478     ++IRHS;
5479   }
5480   Scope.ForceCleanup();
5481   CGF.FinishFunction();
5482   return Fn;
5483 }
5484 
5485 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5486                                                   const Expr *ReductionOp,
5487                                                   const Expr *PrivateRef,
5488                                                   const DeclRefExpr *LHS,
5489                                                   const DeclRefExpr *RHS) {
5490   if (PrivateRef->getType()->isArrayType()) {
5491     // Emit reduction for array section.
5492     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5493     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5494     EmitOMPAggregateReduction(
5495         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5496         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5497           emitReductionCombiner(CGF, ReductionOp);
5498         });
5499   } else {
5500     // Emit reduction for array subscript or single variable.
5501     emitReductionCombiner(CGF, ReductionOp);
5502   }
5503 }
5504 
5505 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5506                                     ArrayRef<const Expr *> Privates,
5507                                     ArrayRef<const Expr *> LHSExprs,
5508                                     ArrayRef<const Expr *> RHSExprs,
5509                                     ArrayRef<const Expr *> ReductionOps,
5510                                     ReductionOptionsTy Options) {
5511   if (!CGF.HaveInsertPoint())
5512     return;
5513 
5514   bool WithNowait = Options.WithNowait;
5515   bool SimpleReduction = Options.SimpleReduction;
5516 
5517   // Next code should be emitted for reduction:
5518   //
5519   // static kmp_critical_name lock = { 0 };
5520   //
5521   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5522   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5523   //  ...
5524   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5525   //  *(Type<n>-1*)rhs[<n>-1]);
5526   // }
5527   //
5528   // ...
5529   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5530   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5531   // RedList, reduce_func, &<lock>)) {
5532   // case 1:
5533   //  ...
5534   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5535   //  ...
5536   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5537   // break;
5538   // case 2:
5539   //  ...
5540   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5541   //  ...
5542   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5543   // break;
5544   // default:;
5545   // }
5546   //
5547   // if SimpleReduction is true, only the next code is generated:
5548   //  ...
5549   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5550   //  ...
5551 
5552   ASTContext &C = CGM.getContext();
5553 
5554   if (SimpleReduction) {
5555     CodeGenFunction::RunCleanupsScope Scope(CGF);
5556     const auto *IPriv = Privates.begin();
5557     const auto *ILHS = LHSExprs.begin();
5558     const auto *IRHS = RHSExprs.begin();
5559     for (const Expr *E : ReductionOps) {
5560       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5561                                   cast<DeclRefExpr>(*IRHS));
5562       ++IPriv;
5563       ++ILHS;
5564       ++IRHS;
5565     }
5566     return;
5567   }
5568 
5569   // 1. Build a list of reduction variables.
5570   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5571   auto Size = RHSExprs.size();
5572   for (const Expr *E : Privates) {
5573     if (E->getType()->isVariablyModifiedType())
5574       // Reserve place for array size.
5575       ++Size;
5576   }
5577   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5578   QualType ReductionArrayTy =
5579       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5580                              /*IndexTypeQuals=*/0);
5581   Address ReductionList =
5582       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5583   const auto *IPriv = Privates.begin();
5584   unsigned Idx = 0;
5585   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5586     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5587     CGF.Builder.CreateStore(
5588         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5589             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5590         Elem);
5591     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5592       // Store array size.
5593       ++Idx;
5594       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5595       llvm::Value *Size = CGF.Builder.CreateIntCast(
5596           CGF.getVLASize(
5597                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5598               .NumElts,
5599           CGF.SizeTy, /*isSigned=*/false);
5600       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5601                               Elem);
5602     }
5603   }
5604 
5605   // 2. Emit reduce_func().
5606   llvm::Function *ReductionFn =
5607       emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5608                             Privates, LHSExprs, RHSExprs, ReductionOps);
5609 
5610   // 3. Create static kmp_critical_name lock = { 0 };
5611   std::string Name = getName({"reduction"});
5612   llvm::Value *Lock = getCriticalRegionLock(Name);
5613 
5614   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5615   // RedList, reduce_func, &<lock>);
5616   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5617   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5618   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5619   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5620       ReductionList.getPointer(), CGF.VoidPtrTy);
5621   llvm::Value *Args[] = {
5622       IdentTLoc,                             // ident_t *<loc>
5623       ThreadId,                              // i32 <gtid>
5624       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5625       ReductionArrayTySize,                  // size_type sizeof(RedList)
5626       RL,                                    // void *RedList
5627       ReductionFn, // void (*) (void *, void *) <reduce_func>
5628       Lock         // kmp_critical_name *&<lock>
5629   };
5630   llvm::Value *Res = CGF.EmitRuntimeCall(
5631       OMPBuilder.getOrCreateRuntimeFunction(
5632           CGM.getModule(),
5633           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5634       Args);
5635 
5636   // 5. Build switch(res)
5637   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5638   llvm::SwitchInst *SwInst =
5639       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5640 
5641   // 6. Build case 1:
5642   //  ...
5643   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5644   //  ...
5645   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5646   // break;
5647   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5648   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5649   CGF.EmitBlock(Case1BB);
5650 
5651   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5652   llvm::Value *EndArgs[] = {
5653       IdentTLoc, // ident_t *<loc>
5654       ThreadId,  // i32 <gtid>
5655       Lock       // kmp_critical_name *&<lock>
5656   };
5657   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5658                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5659     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5660     const auto *IPriv = Privates.begin();
5661     const auto *ILHS = LHSExprs.begin();
5662     const auto *IRHS = RHSExprs.begin();
5663     for (const Expr *E : ReductionOps) {
5664       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5665                                      cast<DeclRefExpr>(*IRHS));
5666       ++IPriv;
5667       ++ILHS;
5668       ++IRHS;
5669     }
5670   };
5671   RegionCodeGenTy RCG(CodeGen);
5672   CommonActionTy Action(
5673       nullptr, llvm::None,
5674       OMPBuilder.getOrCreateRuntimeFunction(
5675           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5676                                       : OMPRTL___kmpc_end_reduce),
5677       EndArgs);
5678   RCG.setAction(Action);
5679   RCG(CGF);
5680 
5681   CGF.EmitBranch(DefaultBB);
5682 
5683   // 7. Build case 2:
5684   //  ...
5685   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5686   //  ...
5687   // break;
5688   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5689   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5690   CGF.EmitBlock(Case2BB);
5691 
5692   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5693                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5694     const auto *ILHS = LHSExprs.begin();
5695     const auto *IRHS = RHSExprs.begin();
5696     const auto *IPriv = Privates.begin();
5697     for (const Expr *E : ReductionOps) {
5698       const Expr *XExpr = nullptr;
5699       const Expr *EExpr = nullptr;
5700       const Expr *UpExpr = nullptr;
5701       BinaryOperatorKind BO = BO_Comma;
5702       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5703         if (BO->getOpcode() == BO_Assign) {
5704           XExpr = BO->getLHS();
5705           UpExpr = BO->getRHS();
5706         }
5707       }
5708       // Try to emit update expression as a simple atomic.
5709       const Expr *RHSExpr = UpExpr;
5710       if (RHSExpr) {
5711         // Analyze RHS part of the whole expression.
5712         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5713                 RHSExpr->IgnoreParenImpCasts())) {
5714           // If this is a conditional operator, analyze its condition for
5715           // min/max reduction operator.
5716           RHSExpr = ACO->getCond();
5717         }
5718         if (const auto *BORHS =
5719                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5720           EExpr = BORHS->getRHS();
5721           BO = BORHS->getOpcode();
5722         }
5723       }
5724       if (XExpr) {
5725         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5726         auto &&AtomicRedGen = [BO, VD,
5727                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5728                                     const Expr *EExpr, const Expr *UpExpr) {
5729           LValue X = CGF.EmitLValue(XExpr);
5730           RValue E;
5731           if (EExpr)
5732             E = CGF.EmitAnyExpr(EExpr);
5733           CGF.EmitOMPAtomicSimpleUpdateExpr(
5734               X, E, BO, /*IsXLHSInRHSPart=*/true,
5735               llvm::AtomicOrdering::Monotonic, Loc,
5736               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5737                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5738                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5739                 CGF.emitOMPSimpleStore(
5740                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5741                     VD->getType().getNonReferenceType(), Loc);
5742                 PrivateScope.addPrivate(VD, LHSTemp);
5743                 (void)PrivateScope.Privatize();
5744                 return CGF.EmitAnyExpr(UpExpr);
5745               });
5746         };
5747         if ((*IPriv)->getType()->isArrayType()) {
5748           // Emit atomic reduction for array section.
5749           const auto *RHSVar =
5750               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5751           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5752                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5753         } else {
5754           // Emit atomic reduction for array subscript or single variable.
5755           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5756         }
5757       } else {
5758         // Emit as a critical region.
5759         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5760                                            const Expr *, const Expr *) {
5761           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5762           std::string Name = RT.getName({"atomic_reduction"});
5763           RT.emitCriticalRegion(
5764               CGF, Name,
5765               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5766                 Action.Enter(CGF);
5767                 emitReductionCombiner(CGF, E);
5768               },
5769               Loc);
5770         };
5771         if ((*IPriv)->getType()->isArrayType()) {
5772           const auto *LHSVar =
5773               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5774           const auto *RHSVar =
5775               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5776           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5777                                     CritRedGen);
5778         } else {
5779           CritRedGen(CGF, nullptr, nullptr, nullptr);
5780         }
5781       }
5782       ++ILHS;
5783       ++IRHS;
5784       ++IPriv;
5785     }
5786   };
5787   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5788   if (!WithNowait) {
5789     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5790     llvm::Value *EndArgs[] = {
5791         IdentTLoc, // ident_t *<loc>
5792         ThreadId,  // i32 <gtid>
5793         Lock       // kmp_critical_name *&<lock>
5794     };
5795     CommonActionTy Action(nullptr, llvm::None,
5796                           OMPBuilder.getOrCreateRuntimeFunction(
5797                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5798                           EndArgs);
5799     AtomicRCG.setAction(Action);
5800     AtomicRCG(CGF);
5801   } else {
5802     AtomicRCG(CGF);
5803   }
5804 
5805   CGF.EmitBranch(DefaultBB);
5806   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5807 }
5808 
5809 /// Generates unique name for artificial threadprivate variables.
5810 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5811 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5812                                       const Expr *Ref) {
5813   SmallString<256> Buffer;
5814   llvm::raw_svector_ostream Out(Buffer);
5815   const clang::DeclRefExpr *DE;
5816   const VarDecl *D = ::getBaseDecl(Ref, DE);
5817   if (!D)
5818     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5819   D = D->getCanonicalDecl();
5820   std::string Name = CGM.getOpenMPRuntime().getName(
5821       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5822   Out << Prefix << Name << "_"
5823       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5824   return std::string(Out.str());
5825 }
5826 
5827 /// Emits reduction initializer function:
5828 /// \code
5829 /// void @.red_init(void* %arg, void* %orig) {
5830 /// %0 = bitcast void* %arg to <type>*
5831 /// store <type> <init>, <type>* %0
5832 /// ret void
5833 /// }
5834 /// \endcode
5835 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5836                                            SourceLocation Loc,
5837                                            ReductionCodeGen &RCG, unsigned N) {
5838   ASTContext &C = CGM.getContext();
5839   QualType VoidPtrTy = C.VoidPtrTy;
5840   VoidPtrTy.addRestrict();
5841   FunctionArgList Args;
5842   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5843                           ImplicitParamDecl::Other);
5844   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5845                               ImplicitParamDecl::Other);
5846   Args.emplace_back(&Param);
5847   Args.emplace_back(&ParamOrig);
5848   const auto &FnInfo =
5849       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5850   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5851   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5852   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5853                                     Name, &CGM.getModule());
5854   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5855   Fn->setDoesNotRecurse();
5856   CodeGenFunction CGF(CGM);
5857   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5858   QualType PrivateType = RCG.getPrivateType(N);
5859   Address PrivateAddr = CGF.EmitLoadOfPointer(
5860       CGF.Builder.CreateElementBitCast(
5861           CGF.GetAddrOfLocalVar(&Param),
5862           CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5863       C.getPointerType(PrivateType)->castAs<PointerType>());
5864   llvm::Value *Size = nullptr;
5865   // If the size of the reduction item is non-constant, load it from global
5866   // threadprivate variable.
5867   if (RCG.getSizes(N).second) {
5868     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5869         CGF, CGM.getContext().getSizeType(),
5870         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5871     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5872                                 CGM.getContext().getSizeType(), Loc);
5873   }
5874   RCG.emitAggregateType(CGF, N, Size);
5875   Address OrigAddr = Address::invalid();
5876   // If initializer uses initializer from declare reduction construct, emit a
5877   // pointer to the address of the original reduction item (reuired by reduction
5878   // initializer)
5879   if (RCG.usesReductionInitializer(N)) {
5880     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5881     OrigAddr = CGF.EmitLoadOfPointer(
5882         SharedAddr,
5883         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5884   }
5885   // Emit the initializer:
5886   // %0 = bitcast void* %arg to <type>*
5887   // store <type> <init>, <type>* %0
5888   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5889                          [](CodeGenFunction &) { return false; });
5890   CGF.FinishFunction();
5891   return Fn;
5892 }
5893 
5894 /// Emits reduction combiner function:
5895 /// \code
5896 /// void @.red_comb(void* %arg0, void* %arg1) {
5897 /// %lhs = bitcast void* %arg0 to <type>*
5898 /// %rhs = bitcast void* %arg1 to <type>*
5899 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5900 /// store <type> %2, <type>* %lhs
5901 /// ret void
5902 /// }
5903 /// \endcode
5904 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5905                                            SourceLocation Loc,
5906                                            ReductionCodeGen &RCG, unsigned N,
5907                                            const Expr *ReductionOp,
5908                                            const Expr *LHS, const Expr *RHS,
5909                                            const Expr *PrivateRef) {
5910   ASTContext &C = CGM.getContext();
5911   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5912   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5913   FunctionArgList Args;
5914   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5915                                C.VoidPtrTy, ImplicitParamDecl::Other);
5916   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5917                             ImplicitParamDecl::Other);
5918   Args.emplace_back(&ParamInOut);
5919   Args.emplace_back(&ParamIn);
5920   const auto &FnInfo =
5921       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5922   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5923   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5924   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5925                                     Name, &CGM.getModule());
5926   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5927   Fn->setDoesNotRecurse();
5928   CodeGenFunction CGF(CGM);
5929   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5930   llvm::Value *Size = nullptr;
5931   // If the size of the reduction item is non-constant, load it from global
5932   // threadprivate variable.
5933   if (RCG.getSizes(N).second) {
5934     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5935         CGF, CGM.getContext().getSizeType(),
5936         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5937     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5938                                 CGM.getContext().getSizeType(), Loc);
5939   }
5940   RCG.emitAggregateType(CGF, N, Size);
5941   // Remap lhs and rhs variables to the addresses of the function arguments.
5942   // %lhs = bitcast void* %arg0 to <type>*
5943   // %rhs = bitcast void* %arg1 to <type>*
5944   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5945   PrivateScope.addPrivate(
5946       LHSVD,
5947       // Pull out the pointer to the variable.
5948       CGF.EmitLoadOfPointer(
5949           CGF.Builder.CreateElementBitCast(
5950               CGF.GetAddrOfLocalVar(&ParamInOut),
5951               CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5952           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5953   PrivateScope.addPrivate(
5954       RHSVD,
5955       // Pull out the pointer to the variable.
5956       CGF.EmitLoadOfPointer(
5957           CGF.Builder.CreateElementBitCast(
5958             CGF.GetAddrOfLocalVar(&ParamIn),
5959             CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5960           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5961   PrivateScope.Privatize();
5962   // Emit the combiner body:
5963   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5964   // store <type> %2, <type>* %lhs
5965   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5966       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5967       cast<DeclRefExpr>(RHS));
5968   CGF.FinishFunction();
5969   return Fn;
5970 }
5971 
5972 /// Emits reduction finalizer function:
5973 /// \code
5974 /// void @.red_fini(void* %arg) {
5975 /// %0 = bitcast void* %arg to <type>*
5976 /// <destroy>(<type>* %0)
5977 /// ret void
5978 /// }
5979 /// \endcode
5980 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5981                                            SourceLocation Loc,
5982                                            ReductionCodeGen &RCG, unsigned N) {
5983   if (!RCG.needCleanups(N))
5984     return nullptr;
5985   ASTContext &C = CGM.getContext();
5986   FunctionArgList Args;
5987   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5988                           ImplicitParamDecl::Other);
5989   Args.emplace_back(&Param);
5990   const auto &FnInfo =
5991       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5992   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5993   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5994   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5995                                     Name, &CGM.getModule());
5996   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5997   Fn->setDoesNotRecurse();
5998   CodeGenFunction CGF(CGM);
5999   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6000   Address PrivateAddr = CGF.EmitLoadOfPointer(
6001       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
6002   llvm::Value *Size = nullptr;
6003   // If the size of the reduction item is non-constant, load it from global
6004   // threadprivate variable.
6005   if (RCG.getSizes(N).second) {
6006     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6007         CGF, CGM.getContext().getSizeType(),
6008         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6009     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6010                                 CGM.getContext().getSizeType(), Loc);
6011   }
6012   RCG.emitAggregateType(CGF, N, Size);
6013   // Emit the finalizer body:
6014   // <destroy>(<type>* %0)
6015   RCG.emitCleanups(CGF, N, PrivateAddr);
6016   CGF.FinishFunction(Loc);
6017   return Fn;
6018 }
6019 
6020 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6021     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6022     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6023   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6024     return nullptr;
6025 
6026   // Build typedef struct:
6027   // kmp_taskred_input {
6028   //   void *reduce_shar; // shared reduction item
6029   //   void *reduce_orig; // original reduction item used for initialization
6030   //   size_t reduce_size; // size of data item
6031   //   void *reduce_init; // data initialization routine
6032   //   void *reduce_fini; // data finalization routine
6033   //   void *reduce_comb; // data combiner routine
6034   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6035   // } kmp_taskred_input_t;
6036   ASTContext &C = CGM.getContext();
6037   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6038   RD->startDefinition();
6039   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6040   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6041   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6042   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6043   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6044   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6045   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6046       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6047   RD->completeDefinition();
6048   QualType RDType = C.getRecordType(RD);
6049   unsigned Size = Data.ReductionVars.size();
6050   llvm::APInt ArraySize(/*numBits=*/64, Size);
6051   QualType ArrayRDType = C.getConstantArrayType(
6052       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6053   // kmp_task_red_input_t .rd_input.[Size];
6054   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6055   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6056                        Data.ReductionCopies, Data.ReductionOps);
6057   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6058     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6059     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6060                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6061     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6062         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6063         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6064         ".rd_input.gep.");
6065     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6066     // ElemLVal.reduce_shar = &Shareds[Cnt];
6067     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6068     RCG.emitSharedOrigLValue(CGF, Cnt);
6069     llvm::Value *CastedShared =
6070         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6071     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6072     // ElemLVal.reduce_orig = &Origs[Cnt];
6073     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6074     llvm::Value *CastedOrig =
6075         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6076     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6077     RCG.emitAggregateType(CGF, Cnt);
6078     llvm::Value *SizeValInChars;
6079     llvm::Value *SizeVal;
6080     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6081     // We use delayed creation/initialization for VLAs and array sections. It is
6082     // required because runtime does not provide the way to pass the sizes of
6083     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6084     // threadprivate global variables are used to store these values and use
6085     // them in the functions.
6086     bool DelayedCreation = !!SizeVal;
6087     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6088                                                /*isSigned=*/false);
6089     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6090     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6091     // ElemLVal.reduce_init = init;
6092     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6093     llvm::Value *InitAddr =
6094         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6095     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6096     // ElemLVal.reduce_fini = fini;
6097     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6098     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6099     llvm::Value *FiniAddr = Fini
6100                                 ? CGF.EmitCastToVoidPtr(Fini)
6101                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6102     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6103     // ElemLVal.reduce_comb = comb;
6104     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6105     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6106         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6107         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6108     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6109     // ElemLVal.flags = 0;
6110     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6111     if (DelayedCreation) {
6112       CGF.EmitStoreOfScalar(
6113           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6114           FlagsLVal);
6115     } else
6116       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6117                                  FlagsLVal.getType());
6118   }
6119   if (Data.IsReductionWithTaskMod) {
6120     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6121     // is_ws, int num, void *data);
6122     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6123     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6124                                                   CGM.IntTy, /*isSigned=*/true);
6125     llvm::Value *Args[] = {
6126         IdentTLoc, GTid,
6127         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6128                                /*isSigned=*/true),
6129         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6130         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6131             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6132     return CGF.EmitRuntimeCall(
6133         OMPBuilder.getOrCreateRuntimeFunction(
6134             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6135         Args);
6136   }
6137   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6138   llvm::Value *Args[] = {
6139       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6140                                 /*isSigned=*/true),
6141       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6142       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6143                                                       CGM.VoidPtrTy)};
6144   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6145                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6146                              Args);
6147 }
6148 
6149 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6150                                             SourceLocation Loc,
6151                                             bool IsWorksharingReduction) {
6152   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6153   // is_ws, int num, void *data);
6154   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6155   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6156                                                 CGM.IntTy, /*isSigned=*/true);
6157   llvm::Value *Args[] = {IdentTLoc, GTid,
6158                          llvm::ConstantInt::get(CGM.IntTy,
6159                                                 IsWorksharingReduction ? 1 : 0,
6160                                                 /*isSigned=*/true)};
6161   (void)CGF.EmitRuntimeCall(
6162       OMPBuilder.getOrCreateRuntimeFunction(
6163           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6164       Args);
6165 }
6166 
6167 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6168                                               SourceLocation Loc,
6169                                               ReductionCodeGen &RCG,
6170                                               unsigned N) {
6171   auto Sizes = RCG.getSizes(N);
6172   // Emit threadprivate global variable if the type is non-constant
6173   // (Sizes.second = nullptr).
6174   if (Sizes.second) {
6175     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6176                                                      /*isSigned=*/false);
6177     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6178         CGF, CGM.getContext().getSizeType(),
6179         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6180     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6181   }
6182 }
6183 
6184 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6185                                               SourceLocation Loc,
6186                                               llvm::Value *ReductionsPtr,
6187                                               LValue SharedLVal) {
6188   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6189   // *d);
6190   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6191                                                    CGM.IntTy,
6192                                                    /*isSigned=*/true),
6193                          ReductionsPtr,
6194                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6195                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6196   return Address(
6197       CGF.EmitRuntimeCall(
6198           OMPBuilder.getOrCreateRuntimeFunction(
6199               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6200           Args),
6201       CGF.Int8Ty, SharedLVal.getAlignment());
6202 }
6203 
6204 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6205                                        const OMPTaskDataTy &Data) {
6206   if (!CGF.HaveInsertPoint())
6207     return;
6208 
6209   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6210     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6211     OMPBuilder.createTaskwait(CGF.Builder);
6212   } else {
6213     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6214     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6215     auto &M = CGM.getModule();
6216     Address DependenciesArray = Address::invalid();
6217     llvm::Value *NumOfElements;
6218     std::tie(NumOfElements, DependenciesArray) =
6219         emitDependClause(CGF, Data.Dependences, Loc);
6220     llvm::Value *DepWaitTaskArgs[6];
6221     if (!Data.Dependences.empty()) {
6222       DepWaitTaskArgs[0] = UpLoc;
6223       DepWaitTaskArgs[1] = ThreadID;
6224       DepWaitTaskArgs[2] = NumOfElements;
6225       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6226       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6227       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6228 
6229       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6230 
6231       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6232       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6233       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6234       // is specified.
6235       CGF.EmitRuntimeCall(
6236           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6237           DepWaitTaskArgs);
6238 
6239     } else {
6240 
6241       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6242       // global_tid);
6243       llvm::Value *Args[] = {UpLoc, ThreadID};
6244       // Ignore return result until untied tasks are supported.
6245       CGF.EmitRuntimeCall(
6246           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6247           Args);
6248     }
6249   }
6250 
6251   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6252     Region->emitUntiedSwitch(CGF);
6253 }
6254 
6255 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6256                                            OpenMPDirectiveKind InnerKind,
6257                                            const RegionCodeGenTy &CodeGen,
6258                                            bool HasCancel) {
6259   if (!CGF.HaveInsertPoint())
6260     return;
6261   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6262                                  InnerKind != OMPD_critical &&
6263                                      InnerKind != OMPD_master &&
6264                                      InnerKind != OMPD_masked);
6265   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6266 }
6267 
6268 namespace {
6269 enum RTCancelKind {
6270   CancelNoreq = 0,
6271   CancelParallel = 1,
6272   CancelLoop = 2,
6273   CancelSections = 3,
6274   CancelTaskgroup = 4
6275 };
6276 } // anonymous namespace
6277 
6278 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6279   RTCancelKind CancelKind = CancelNoreq;
6280   if (CancelRegion == OMPD_parallel)
6281     CancelKind = CancelParallel;
6282   else if (CancelRegion == OMPD_for)
6283     CancelKind = CancelLoop;
6284   else if (CancelRegion == OMPD_sections)
6285     CancelKind = CancelSections;
6286   else {
6287     assert(CancelRegion == OMPD_taskgroup);
6288     CancelKind = CancelTaskgroup;
6289   }
6290   return CancelKind;
6291 }
6292 
6293 void CGOpenMPRuntime::emitCancellationPointCall(
6294     CodeGenFunction &CGF, SourceLocation Loc,
6295     OpenMPDirectiveKind CancelRegion) {
6296   if (!CGF.HaveInsertPoint())
6297     return;
6298   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6299   // global_tid, kmp_int32 cncl_kind);
6300   if (auto *OMPRegionInfo =
6301           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6302     // For 'cancellation point taskgroup', the task region info may not have a
6303     // cancel. This may instead happen in another adjacent task.
6304     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6305       llvm::Value *Args[] = {
6306           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6307           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6308       // Ignore return result until untied tasks are supported.
6309       llvm::Value *Result = CGF.EmitRuntimeCall(
6310           OMPBuilder.getOrCreateRuntimeFunction(
6311               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6312           Args);
6313       // if (__kmpc_cancellationpoint()) {
6314       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6315       //   exit from construct;
6316       // }
6317       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6318       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6319       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6320       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6321       CGF.EmitBlock(ExitBB);
6322       if (CancelRegion == OMPD_parallel)
6323         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6324       // exit from construct;
6325       CodeGenFunction::JumpDest CancelDest =
6326           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6327       CGF.EmitBranchThroughCleanup(CancelDest);
6328       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6329     }
6330   }
6331 }
6332 
6333 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6334                                      const Expr *IfCond,
6335                                      OpenMPDirectiveKind CancelRegion) {
6336   if (!CGF.HaveInsertPoint())
6337     return;
6338   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6339   // kmp_int32 cncl_kind);
6340   auto &M = CGM.getModule();
6341   if (auto *OMPRegionInfo =
6342           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6343     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6344                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6345       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6346       llvm::Value *Args[] = {
6347           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6348           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6349       // Ignore return result until untied tasks are supported.
6350       llvm::Value *Result = CGF.EmitRuntimeCall(
6351           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6352       // if (__kmpc_cancel()) {
6353       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6354       //   exit from construct;
6355       // }
6356       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6357       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6358       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6359       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6360       CGF.EmitBlock(ExitBB);
6361       if (CancelRegion == OMPD_parallel)
6362         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6363       // exit from construct;
6364       CodeGenFunction::JumpDest CancelDest =
6365           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6366       CGF.EmitBranchThroughCleanup(CancelDest);
6367       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6368     };
6369     if (IfCond) {
6370       emitIfClause(CGF, IfCond, ThenGen,
6371                    [](CodeGenFunction &, PrePostActionTy &) {});
6372     } else {
6373       RegionCodeGenTy ThenRCG(ThenGen);
6374       ThenRCG(CGF);
6375     }
6376   }
6377 }
6378 
6379 namespace {
6380 /// Cleanup action for uses_allocators support.
6381 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6382   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6383 
6384 public:
6385   OMPUsesAllocatorsActionTy(
6386       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6387       : Allocators(Allocators) {}
6388   void Enter(CodeGenFunction &CGF) override {
6389     if (!CGF.HaveInsertPoint())
6390       return;
6391     for (const auto &AllocatorData : Allocators) {
6392       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6393           CGF, AllocatorData.first, AllocatorData.second);
6394     }
6395   }
6396   void Exit(CodeGenFunction &CGF) override {
6397     if (!CGF.HaveInsertPoint())
6398       return;
6399     for (const auto &AllocatorData : Allocators) {
6400       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6401                                                         AllocatorData.first);
6402     }
6403   }
6404 };
6405 } // namespace
6406 
6407 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6408     const OMPExecutableDirective &D, StringRef ParentName,
6409     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6410     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6411   assert(!ParentName.empty() && "Invalid target region parent name!");
6412   HasEmittedTargetRegion = true;
6413   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6414   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6415     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6416       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6417       if (!D.AllocatorTraits)
6418         continue;
6419       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6420     }
6421   }
6422   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6423   CodeGen.setAction(UsesAllocatorAction);
6424   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6425                                    IsOffloadEntry, CodeGen);
6426 }
6427 
6428 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6429                                              const Expr *Allocator,
6430                                              const Expr *AllocatorTraits) {
6431   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6432   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6433   // Use default memspace handle.
6434   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6435   llvm::Value *NumTraits = llvm::ConstantInt::get(
6436       CGF.IntTy, cast<ConstantArrayType>(
6437                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6438                      ->getSize()
6439                      .getLimitedValue());
6440   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6441   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6442       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6443   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6444                                            AllocatorTraitsLVal.getBaseInfo(),
6445                                            AllocatorTraitsLVal.getTBAAInfo());
6446   llvm::Value *Traits =
6447       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6448 
6449   llvm::Value *AllocatorVal =
6450       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6451                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6452                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6453   // Store to allocator.
6454   CGF.EmitVarDecl(*cast<VarDecl>(
6455       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6456   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6457   AllocatorVal =
6458       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6459                                Allocator->getType(), Allocator->getExprLoc());
6460   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6461 }
6462 
6463 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6464                                              const Expr *Allocator) {
6465   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6466   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6467   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6468   llvm::Value *AllocatorVal =
6469       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6470   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6471                                           CGF.getContext().VoidPtrTy,
6472                                           Allocator->getExprLoc());
6473   (void)CGF.EmitRuntimeCall(
6474       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6475                                             OMPRTL___kmpc_destroy_allocator),
6476       {ThreadId, AllocatorVal});
6477 }
6478 
6479 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6480     const OMPExecutableDirective &D, StringRef ParentName,
6481     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6482     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6483   // Create a unique name for the entry function using the source location
6484   // information of the current target region. The name will be something like:
6485   //
6486   // __omp_offloading_DD_FFFF_PP_lBB
6487   //
6488   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6489   // mangled name of the function that encloses the target region and BB is the
6490   // line number of the target region.
6491 
6492   const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6493                                !CGM.getLangOpts().OpenMPOffloadMandatory;
6494   unsigned DeviceID;
6495   unsigned FileID;
6496   unsigned Line;
6497   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6498                            Line);
6499   SmallString<64> EntryFnName;
6500   {
6501     llvm::raw_svector_ostream OS(EntryFnName);
6502     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6503        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6504   }
6505 
6506   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6507 
6508   CodeGenFunction CGF(CGM, true);
6509   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6510   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6511 
6512   if (BuildOutlinedFn)
6513     OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6514 
6515   // If this target outline function is not an offload entry, we don't need to
6516   // register it.
6517   if (!IsOffloadEntry)
6518     return;
6519 
6520   // The target region ID is used by the runtime library to identify the current
6521   // target region, so it only has to be unique and not necessarily point to
6522   // anything. It could be the pointer to the outlined function that implements
6523   // the target region, but we aren't using that so that the compiler doesn't
6524   // need to keep that, and could therefore inline the host function if proven
6525   // worthwhile during optimization. In the other hand, if emitting code for the
6526   // device, the ID has to be the function address so that it can retrieved from
6527   // the offloading entry and launched by the runtime library. We also mark the
6528   // outlined function to have external linkage in case we are emitting code for
6529   // the device, because these functions will be entry points to the device.
6530 
6531   if (CGM.getLangOpts().OpenMPIsDevice) {
6532     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6533     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6534     OutlinedFn->setDSOLocal(false);
6535     if (CGM.getTriple().isAMDGCN())
6536       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6537   } else {
6538     std::string Name = getName({EntryFnName, "region_id"});
6539     OutlinedFnID = new llvm::GlobalVariable(
6540         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6541         llvm::GlobalValue::WeakAnyLinkage,
6542         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6543   }
6544 
6545   // If we do not allow host fallback we still need a named address to use.
6546   llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6547   if (!BuildOutlinedFn) {
6548     assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
6549            "Named kernel already exists?");
6550     TargetRegionEntryAddr = new llvm::GlobalVariable(
6551         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6552         llvm::GlobalValue::InternalLinkage,
6553         llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6554   }
6555 
6556   // Register the information for the entry associated with this target region.
6557   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6558       DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
6559       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6560 
6561   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6562   int32_t DefaultValTeams = -1;
6563   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6564   if (DefaultValTeams > 0 && OutlinedFn) {
6565     OutlinedFn->addFnAttr("omp_target_num_teams",
6566                           std::to_string(DefaultValTeams));
6567   }
6568   int32_t DefaultValThreads = -1;
6569   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6570   if (DefaultValThreads > 0 && OutlinedFn) {
6571     OutlinedFn->addFnAttr("omp_target_thread_limit",
6572                           std::to_string(DefaultValThreads));
6573   }
6574 
6575   if (BuildOutlinedFn)
6576     CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6577 }
6578 
6579 /// Checks if the expression is constant or does not have non-trivial function
6580 /// calls.
6581 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6582   // We can skip constant expressions.
6583   // We can skip expressions with trivial calls or simple expressions.
6584   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6585           !E->hasNonTrivialCall(Ctx)) &&
6586          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6587 }
6588 
6589 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6590                                                     const Stmt *Body) {
6591   const Stmt *Child = Body->IgnoreContainers();
6592   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6593     Child = nullptr;
6594     for (const Stmt *S : C->body()) {
6595       if (const auto *E = dyn_cast<Expr>(S)) {
6596         if (isTrivial(Ctx, E))
6597           continue;
6598       }
6599       // Some of the statements can be ignored.
6600       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6601           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6602         continue;
6603       // Analyze declarations.
6604       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6605         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6606               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6607                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6608                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6609                   isa<UsingDirectiveDecl>(D) ||
6610                   isa<OMPDeclareReductionDecl>(D) ||
6611                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6612                 return true;
6613               const auto *VD = dyn_cast<VarDecl>(D);
6614               if (!VD)
6615                 return false;
6616               return VD->hasGlobalStorage() || !VD->isUsed();
6617             }))
6618           continue;
6619       }
6620       // Found multiple children - cannot get the one child only.
6621       if (Child)
6622         return nullptr;
6623       Child = S;
6624     }
6625     if (Child)
6626       Child = Child->IgnoreContainers();
6627   }
6628   return Child;
6629 }
6630 
6631 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6632     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6633     int32_t &DefaultVal) {
6634 
6635   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6636   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6637          "Expected target-based executable directive.");
6638   switch (DirectiveKind) {
6639   case OMPD_target: {
6640     const auto *CS = D.getInnermostCapturedStmt();
6641     const auto *Body =
6642         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6643     const Stmt *ChildStmt =
6644         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6645     if (const auto *NestedDir =
6646             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6647       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6648         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6649           const Expr *NumTeams =
6650               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6651           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6652             if (auto Constant =
6653                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6654               DefaultVal = Constant->getExtValue();
6655           return NumTeams;
6656         }
6657         DefaultVal = 0;
6658         return nullptr;
6659       }
6660       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6661           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6662         DefaultVal = 1;
6663         return nullptr;
6664       }
6665       DefaultVal = 1;
6666       return nullptr;
6667     }
6668     // A value of -1 is used to check if we need to emit no teams region
6669     DefaultVal = -1;
6670     return nullptr;
6671   }
6672   case OMPD_target_teams:
6673   case OMPD_target_teams_distribute:
6674   case OMPD_target_teams_distribute_simd:
6675   case OMPD_target_teams_distribute_parallel_for:
6676   case OMPD_target_teams_distribute_parallel_for_simd: {
6677     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6678       const Expr *NumTeams =
6679           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6680       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6681         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6682           DefaultVal = Constant->getExtValue();
6683       return NumTeams;
6684     }
6685     DefaultVal = 0;
6686     return nullptr;
6687   }
6688   case OMPD_target_parallel:
6689   case OMPD_target_parallel_for:
6690   case OMPD_target_parallel_for_simd:
6691   case OMPD_target_simd:
6692     DefaultVal = 1;
6693     return nullptr;
6694   case OMPD_parallel:
6695   case OMPD_for:
6696   case OMPD_parallel_for:
6697   case OMPD_parallel_master:
6698   case OMPD_parallel_sections:
6699   case OMPD_for_simd:
6700   case OMPD_parallel_for_simd:
6701   case OMPD_cancel:
6702   case OMPD_cancellation_point:
6703   case OMPD_ordered:
6704   case OMPD_threadprivate:
6705   case OMPD_allocate:
6706   case OMPD_task:
6707   case OMPD_simd:
6708   case OMPD_tile:
6709   case OMPD_unroll:
6710   case OMPD_sections:
6711   case OMPD_section:
6712   case OMPD_single:
6713   case OMPD_master:
6714   case OMPD_critical:
6715   case OMPD_taskyield:
6716   case OMPD_barrier:
6717   case OMPD_taskwait:
6718   case OMPD_taskgroup:
6719   case OMPD_atomic:
6720   case OMPD_flush:
6721   case OMPD_depobj:
6722   case OMPD_scan:
6723   case OMPD_teams:
6724   case OMPD_target_data:
6725   case OMPD_target_exit_data:
6726   case OMPD_target_enter_data:
6727   case OMPD_distribute:
6728   case OMPD_distribute_simd:
6729   case OMPD_distribute_parallel_for:
6730   case OMPD_distribute_parallel_for_simd:
6731   case OMPD_teams_distribute:
6732   case OMPD_teams_distribute_simd:
6733   case OMPD_teams_distribute_parallel_for:
6734   case OMPD_teams_distribute_parallel_for_simd:
6735   case OMPD_target_update:
6736   case OMPD_declare_simd:
6737   case OMPD_declare_variant:
6738   case OMPD_begin_declare_variant:
6739   case OMPD_end_declare_variant:
6740   case OMPD_declare_target:
6741   case OMPD_end_declare_target:
6742   case OMPD_declare_reduction:
6743   case OMPD_declare_mapper:
6744   case OMPD_taskloop:
6745   case OMPD_taskloop_simd:
6746   case OMPD_master_taskloop:
6747   case OMPD_master_taskloop_simd:
6748   case OMPD_parallel_master_taskloop:
6749   case OMPD_parallel_master_taskloop_simd:
6750   case OMPD_requires:
6751   case OMPD_metadirective:
6752   case OMPD_unknown:
6753     break;
6754   default:
6755     break;
6756   }
6757   llvm_unreachable("Unexpected directive kind.");
6758 }
6759 
6760 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6761     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6762   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6763          "Clauses associated with the teams directive expected to be emitted "
6764          "only for the host!");
6765   CGBuilderTy &Bld = CGF.Builder;
6766   int32_t DefaultNT = -1;
6767   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6768   if (NumTeams != nullptr) {
6769     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6770 
6771     switch (DirectiveKind) {
6772     case OMPD_target: {
6773       const auto *CS = D.getInnermostCapturedStmt();
6774       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6775       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6776       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6777                                                   /*IgnoreResultAssign*/ true);
6778       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6779                              /*isSigned=*/true);
6780     }
6781     case OMPD_target_teams:
6782     case OMPD_target_teams_distribute:
6783     case OMPD_target_teams_distribute_simd:
6784     case OMPD_target_teams_distribute_parallel_for:
6785     case OMPD_target_teams_distribute_parallel_for_simd: {
6786       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6787       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6788                                                   /*IgnoreResultAssign*/ true);
6789       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6790                              /*isSigned=*/true);
6791     }
6792     default:
6793       break;
6794     }
6795   } else if (DefaultNT == -1) {
6796     return nullptr;
6797   }
6798 
6799   return Bld.getInt32(DefaultNT);
6800 }
6801 
6802 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6803                                   llvm::Value *DefaultThreadLimitVal) {
6804   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6805       CGF.getContext(), CS->getCapturedStmt());
6806   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6807     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6808       llvm::Value *NumThreads = nullptr;
6809       llvm::Value *CondVal = nullptr;
6810       // Handle if clause. If if clause present, the number of threads is
6811       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6812       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6813         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6814         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6815         const OMPIfClause *IfClause = nullptr;
6816         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6817           if (C->getNameModifier() == OMPD_unknown ||
6818               C->getNameModifier() == OMPD_parallel) {
6819             IfClause = C;
6820             break;
6821           }
6822         }
6823         if (IfClause) {
6824           const Expr *Cond = IfClause->getCondition();
6825           bool Result;
6826           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6827             if (!Result)
6828               return CGF.Builder.getInt32(1);
6829           } else {
6830             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6831             if (const auto *PreInit =
6832                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6833               for (const auto *I : PreInit->decls()) {
6834                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6835                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6836                 } else {
6837                   CodeGenFunction::AutoVarEmission Emission =
6838                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6839                   CGF.EmitAutoVarCleanups(Emission);
6840                 }
6841               }
6842             }
6843             CondVal = CGF.EvaluateExprAsBool(Cond);
6844           }
6845         }
6846       }
6847       // Check the value of num_threads clause iff if clause was not specified
6848       // or is not evaluated to false.
6849       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6850         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6851         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6852         const auto *NumThreadsClause =
6853             Dir->getSingleClause<OMPNumThreadsClause>();
6854         CodeGenFunction::LexicalScope Scope(
6855             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6856         if (const auto *PreInit =
6857                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6858           for (const auto *I : PreInit->decls()) {
6859             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6860               CGF.EmitVarDecl(cast<VarDecl>(*I));
6861             } else {
6862               CodeGenFunction::AutoVarEmission Emission =
6863                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6864               CGF.EmitAutoVarCleanups(Emission);
6865             }
6866           }
6867         }
6868         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6869         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6870                                                /*isSigned=*/false);
6871         if (DefaultThreadLimitVal)
6872           NumThreads = CGF.Builder.CreateSelect(
6873               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6874               DefaultThreadLimitVal, NumThreads);
6875       } else {
6876         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6877                                            : CGF.Builder.getInt32(0);
6878       }
6879       // Process condition of the if clause.
6880       if (CondVal) {
6881         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6882                                               CGF.Builder.getInt32(1));
6883       }
6884       return NumThreads;
6885     }
6886     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6887       return CGF.Builder.getInt32(1);
6888     return DefaultThreadLimitVal;
6889   }
6890   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6891                                : CGF.Builder.getInt32(0);
6892 }
6893 
6894 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6895     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6896     int32_t &DefaultVal) {
6897   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6898   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6899          "Expected target-based executable directive.");
6900 
6901   switch (DirectiveKind) {
6902   case OMPD_target:
6903     // Teams have no clause thread_limit
6904     return nullptr;
6905   case OMPD_target_teams:
6906   case OMPD_target_teams_distribute:
6907     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6908       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6909       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6910       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6911         if (auto Constant =
6912                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6913           DefaultVal = Constant->getExtValue();
6914       return ThreadLimit;
6915     }
6916     return nullptr;
6917   case OMPD_target_parallel:
6918   case OMPD_target_parallel_for:
6919   case OMPD_target_parallel_for_simd:
6920   case OMPD_target_teams_distribute_parallel_for:
6921   case OMPD_target_teams_distribute_parallel_for_simd: {
6922     Expr *ThreadLimit = nullptr;
6923     Expr *NumThreads = nullptr;
6924     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6925       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6926       ThreadLimit = ThreadLimitClause->getThreadLimit();
6927       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6928         if (auto Constant =
6929                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6930           DefaultVal = Constant->getExtValue();
6931     }
6932     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6933       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6934       NumThreads = NumThreadsClause->getNumThreads();
6935       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6936         if (auto Constant =
6937                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6938           if (Constant->getExtValue() < DefaultVal) {
6939             DefaultVal = Constant->getExtValue();
6940             ThreadLimit = NumThreads;
6941           }
6942         }
6943       }
6944     }
6945     return ThreadLimit;
6946   }
6947   case OMPD_target_teams_distribute_simd:
6948   case OMPD_target_simd:
6949     DefaultVal = 1;
6950     return nullptr;
6951   case OMPD_parallel:
6952   case OMPD_for:
6953   case OMPD_parallel_for:
6954   case OMPD_parallel_master:
6955   case OMPD_parallel_sections:
6956   case OMPD_for_simd:
6957   case OMPD_parallel_for_simd:
6958   case OMPD_cancel:
6959   case OMPD_cancellation_point:
6960   case OMPD_ordered:
6961   case OMPD_threadprivate:
6962   case OMPD_allocate:
6963   case OMPD_task:
6964   case OMPD_simd:
6965   case OMPD_tile:
6966   case OMPD_unroll:
6967   case OMPD_sections:
6968   case OMPD_section:
6969   case OMPD_single:
6970   case OMPD_master:
6971   case OMPD_critical:
6972   case OMPD_taskyield:
6973   case OMPD_barrier:
6974   case OMPD_taskwait:
6975   case OMPD_taskgroup:
6976   case OMPD_atomic:
6977   case OMPD_flush:
6978   case OMPD_depobj:
6979   case OMPD_scan:
6980   case OMPD_teams:
6981   case OMPD_target_data:
6982   case OMPD_target_exit_data:
6983   case OMPD_target_enter_data:
6984   case OMPD_distribute:
6985   case OMPD_distribute_simd:
6986   case OMPD_distribute_parallel_for:
6987   case OMPD_distribute_parallel_for_simd:
6988   case OMPD_teams_distribute:
6989   case OMPD_teams_distribute_simd:
6990   case OMPD_teams_distribute_parallel_for:
6991   case OMPD_teams_distribute_parallel_for_simd:
6992   case OMPD_target_update:
6993   case OMPD_declare_simd:
6994   case OMPD_declare_variant:
6995   case OMPD_begin_declare_variant:
6996   case OMPD_end_declare_variant:
6997   case OMPD_declare_target:
6998   case OMPD_end_declare_target:
6999   case OMPD_declare_reduction:
7000   case OMPD_declare_mapper:
7001   case OMPD_taskloop:
7002   case OMPD_taskloop_simd:
7003   case OMPD_master_taskloop:
7004   case OMPD_master_taskloop_simd:
7005   case OMPD_parallel_master_taskloop:
7006   case OMPD_parallel_master_taskloop_simd:
7007   case OMPD_requires:
7008   case OMPD_unknown:
7009     break;
7010   default:
7011     break;
7012   }
7013   llvm_unreachable("Unsupported directive kind.");
7014 }
7015 
7016 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7017     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7018   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7019          "Clauses associated with the teams directive expected to be emitted "
7020          "only for the host!");
7021   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7022   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7023          "Expected target-based executable directive.");
7024   CGBuilderTy &Bld = CGF.Builder;
7025   llvm::Value *ThreadLimitVal = nullptr;
7026   llvm::Value *NumThreadsVal = nullptr;
7027   switch (DirectiveKind) {
7028   case OMPD_target: {
7029     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7030     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7031       return NumThreads;
7032     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7033         CGF.getContext(), CS->getCapturedStmt());
7034     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7035       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7036         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7037         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7038         const auto *ThreadLimitClause =
7039             Dir->getSingleClause<OMPThreadLimitClause>();
7040         CodeGenFunction::LexicalScope Scope(
7041             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7042         if (const auto *PreInit =
7043                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7044           for (const auto *I : PreInit->decls()) {
7045             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7046               CGF.EmitVarDecl(cast<VarDecl>(*I));
7047             } else {
7048               CodeGenFunction::AutoVarEmission Emission =
7049                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7050               CGF.EmitAutoVarCleanups(Emission);
7051             }
7052           }
7053         }
7054         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7055             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7056         ThreadLimitVal =
7057             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7058       }
7059       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7060           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7061         CS = Dir->getInnermostCapturedStmt();
7062         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7063             CGF.getContext(), CS->getCapturedStmt());
7064         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7065       }
7066       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7067           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7068         CS = Dir->getInnermostCapturedStmt();
7069         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7070           return NumThreads;
7071       }
7072       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7073         return Bld.getInt32(1);
7074     }
7075     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7076   }
7077   case OMPD_target_teams: {
7078     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7079       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7080       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7081       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7082           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7083       ThreadLimitVal =
7084           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7085     }
7086     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7087     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7088       return NumThreads;
7089     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7090         CGF.getContext(), CS->getCapturedStmt());
7091     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7092       if (Dir->getDirectiveKind() == OMPD_distribute) {
7093         CS = Dir->getInnermostCapturedStmt();
7094         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7095           return NumThreads;
7096       }
7097     }
7098     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7099   }
7100   case OMPD_target_teams_distribute:
7101     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7102       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7103       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7104       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7105           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7106       ThreadLimitVal =
7107           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7108     }
7109     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7110   case OMPD_target_parallel:
7111   case OMPD_target_parallel_for:
7112   case OMPD_target_parallel_for_simd:
7113   case OMPD_target_teams_distribute_parallel_for:
7114   case OMPD_target_teams_distribute_parallel_for_simd: {
7115     llvm::Value *CondVal = nullptr;
7116     // Handle if clause. If if clause present, the number of threads is
7117     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7118     if (D.hasClausesOfKind<OMPIfClause>()) {
7119       const OMPIfClause *IfClause = nullptr;
7120       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7121         if (C->getNameModifier() == OMPD_unknown ||
7122             C->getNameModifier() == OMPD_parallel) {
7123           IfClause = C;
7124           break;
7125         }
7126       }
7127       if (IfClause) {
7128         const Expr *Cond = IfClause->getCondition();
7129         bool Result;
7130         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7131           if (!Result)
7132             return Bld.getInt32(1);
7133         } else {
7134           CodeGenFunction::RunCleanupsScope Scope(CGF);
7135           CondVal = CGF.EvaluateExprAsBool(Cond);
7136         }
7137       }
7138     }
7139     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7140       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7141       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7142       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7143           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7144       ThreadLimitVal =
7145           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7146     }
7147     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7148       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7149       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7150       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7151           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7152       NumThreadsVal =
7153           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7154       ThreadLimitVal = ThreadLimitVal
7155                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7156                                                                 ThreadLimitVal),
7157                                               NumThreadsVal, ThreadLimitVal)
7158                            : NumThreadsVal;
7159     }
7160     if (!ThreadLimitVal)
7161       ThreadLimitVal = Bld.getInt32(0);
7162     if (CondVal)
7163       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7164     return ThreadLimitVal;
7165   }
7166   case OMPD_target_teams_distribute_simd:
7167   case OMPD_target_simd:
7168     return Bld.getInt32(1);
7169   case OMPD_parallel:
7170   case OMPD_for:
7171   case OMPD_parallel_for:
7172   case OMPD_parallel_master:
7173   case OMPD_parallel_sections:
7174   case OMPD_for_simd:
7175   case OMPD_parallel_for_simd:
7176   case OMPD_cancel:
7177   case OMPD_cancellation_point:
7178   case OMPD_ordered:
7179   case OMPD_threadprivate:
7180   case OMPD_allocate:
7181   case OMPD_task:
7182   case OMPD_simd:
7183   case OMPD_tile:
7184   case OMPD_unroll:
7185   case OMPD_sections:
7186   case OMPD_section:
7187   case OMPD_single:
7188   case OMPD_master:
7189   case OMPD_critical:
7190   case OMPD_taskyield:
7191   case OMPD_barrier:
7192   case OMPD_taskwait:
7193   case OMPD_taskgroup:
7194   case OMPD_atomic:
7195   case OMPD_flush:
7196   case OMPD_depobj:
7197   case OMPD_scan:
7198   case OMPD_teams:
7199   case OMPD_target_data:
7200   case OMPD_target_exit_data:
7201   case OMPD_target_enter_data:
7202   case OMPD_distribute:
7203   case OMPD_distribute_simd:
7204   case OMPD_distribute_parallel_for:
7205   case OMPD_distribute_parallel_for_simd:
7206   case OMPD_teams_distribute:
7207   case OMPD_teams_distribute_simd:
7208   case OMPD_teams_distribute_parallel_for:
7209   case OMPD_teams_distribute_parallel_for_simd:
7210   case OMPD_target_update:
7211   case OMPD_declare_simd:
7212   case OMPD_declare_variant:
7213   case OMPD_begin_declare_variant:
7214   case OMPD_end_declare_variant:
7215   case OMPD_declare_target:
7216   case OMPD_end_declare_target:
7217   case OMPD_declare_reduction:
7218   case OMPD_declare_mapper:
7219   case OMPD_taskloop:
7220   case OMPD_taskloop_simd:
7221   case OMPD_master_taskloop:
7222   case OMPD_master_taskloop_simd:
7223   case OMPD_parallel_master_taskloop:
7224   case OMPD_parallel_master_taskloop_simd:
7225   case OMPD_requires:
7226   case OMPD_metadirective:
7227   case OMPD_unknown:
7228     break;
7229   default:
7230     break;
7231   }
7232   llvm_unreachable("Unsupported directive kind.");
7233 }
7234 
7235 namespace {
7236 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7237 
7238 // Utility to handle information from clauses associated with a given
7239 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7240 // It provides a convenient interface to obtain the information and generate
7241 // code for that information.
7242 class MappableExprsHandler {
7243 public:
7244   /// Values for bit flags used to specify the mapping type for
7245   /// offloading.
7246   enum OpenMPOffloadMappingFlags : uint64_t {
7247     /// No flags
7248     OMP_MAP_NONE = 0x0,
7249     /// Allocate memory on the device and move data from host to device.
7250     OMP_MAP_TO = 0x01,
7251     /// Allocate memory on the device and move data from device to host.
7252     OMP_MAP_FROM = 0x02,
7253     /// Always perform the requested mapping action on the element, even
7254     /// if it was already mapped before.
7255     OMP_MAP_ALWAYS = 0x04,
7256     /// Delete the element from the device environment, ignoring the
7257     /// current reference count associated with the element.
7258     OMP_MAP_DELETE = 0x08,
7259     /// The element being mapped is a pointer-pointee pair; both the
7260     /// pointer and the pointee should be mapped.
7261     OMP_MAP_PTR_AND_OBJ = 0x10,
7262     /// This flags signals that the base address of an entry should be
7263     /// passed to the target kernel as an argument.
7264     OMP_MAP_TARGET_PARAM = 0x20,
7265     /// Signal that the runtime library has to return the device pointer
7266     /// in the current position for the data being mapped. Used when we have the
7267     /// use_device_ptr or use_device_addr clause.
7268     OMP_MAP_RETURN_PARAM = 0x40,
7269     /// This flag signals that the reference being passed is a pointer to
7270     /// private data.
7271     OMP_MAP_PRIVATE = 0x80,
7272     /// Pass the element to the device by value.
7273     OMP_MAP_LITERAL = 0x100,
7274     /// Implicit map
7275     OMP_MAP_IMPLICIT = 0x200,
7276     /// Close is a hint to the runtime to allocate memory close to
7277     /// the target device.
7278     OMP_MAP_CLOSE = 0x400,
7279     /// 0x800 is reserved for compatibility with XLC.
7280     /// Produce a runtime error if the data is not already allocated.
7281     OMP_MAP_PRESENT = 0x1000,
7282     // Increment and decrement a separate reference counter so that the data
7283     // cannot be unmapped within the associated region.  Thus, this flag is
7284     // intended to be used on 'target' and 'target data' directives because they
7285     // are inherently structured.  It is not intended to be used on 'target
7286     // enter data' and 'target exit data' directives because they are inherently
7287     // dynamic.
7288     // This is an OpenMP extension for the sake of OpenACC support.
7289     OMP_MAP_OMPX_HOLD = 0x2000,
7290     /// Signal that the runtime library should use args as an array of
7291     /// descriptor_dim pointers and use args_size as dims. Used when we have
7292     /// non-contiguous list items in target update directive
7293     OMP_MAP_NON_CONTIG = 0x100000000000,
7294     /// The 16 MSBs of the flags indicate whether the entry is member of some
7295     /// struct/class.
7296     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7297     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7298   };
7299 
7300   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7301   static unsigned getFlagMemberOffset() {
7302     unsigned Offset = 0;
7303     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7304          Remain = Remain >> 1)
7305       Offset++;
7306     return Offset;
7307   }
7308 
7309   /// Class that holds debugging information for a data mapping to be passed to
7310   /// the runtime library.
7311   class MappingExprInfo {
7312     /// The variable declaration used for the data mapping.
7313     const ValueDecl *MapDecl = nullptr;
7314     /// The original expression used in the map clause, or null if there is
7315     /// none.
7316     const Expr *MapExpr = nullptr;
7317 
7318   public:
7319     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7320         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7321 
7322     const ValueDecl *getMapDecl() const { return MapDecl; }
7323     const Expr *getMapExpr() const { return MapExpr; }
7324   };
7325 
7326   /// Class that associates information with a base pointer to be passed to the
7327   /// runtime library.
7328   class BasePointerInfo {
7329     /// The base pointer.
7330     llvm::Value *Ptr = nullptr;
7331     /// The base declaration that refers to this device pointer, or null if
7332     /// there is none.
7333     const ValueDecl *DevPtrDecl = nullptr;
7334 
7335   public:
7336     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7337         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7338     llvm::Value *operator*() const { return Ptr; }
7339     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7340     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7341   };
7342 
7343   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7344   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7345   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7346   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7347   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7348   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7349   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7350 
7351   /// This structure contains combined information generated for mappable
7352   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7353   /// mappers, and non-contiguous information.
7354   struct MapCombinedInfoTy {
7355     struct StructNonContiguousInfo {
7356       bool IsNonContiguous = false;
7357       MapDimArrayTy Dims;
7358       MapNonContiguousArrayTy Offsets;
7359       MapNonContiguousArrayTy Counts;
7360       MapNonContiguousArrayTy Strides;
7361     };
7362     MapExprsArrayTy Exprs;
7363     MapBaseValuesArrayTy BasePointers;
7364     MapValuesArrayTy Pointers;
7365     MapValuesArrayTy Sizes;
7366     MapFlagsArrayTy Types;
7367     MapMappersArrayTy Mappers;
7368     StructNonContiguousInfo NonContigInfo;
7369 
7370     /// Append arrays in \a CurInfo.
7371     void append(MapCombinedInfoTy &CurInfo) {
7372       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7373       BasePointers.append(CurInfo.BasePointers.begin(),
7374                           CurInfo.BasePointers.end());
7375       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7376       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7377       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7378       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7379       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7380                                  CurInfo.NonContigInfo.Dims.end());
7381       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7382                                     CurInfo.NonContigInfo.Offsets.end());
7383       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7384                                    CurInfo.NonContigInfo.Counts.end());
7385       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7386                                     CurInfo.NonContigInfo.Strides.end());
7387     }
7388   };
7389 
7390   /// Map between a struct and the its lowest & highest elements which have been
7391   /// mapped.
7392   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7393   ///                    HE(FieldIndex, Pointer)}
7394   struct StructRangeInfoTy {
7395     MapCombinedInfoTy PreliminaryMapData;
7396     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7397         0, Address::invalid()};
7398     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7399         0, Address::invalid()};
7400     Address Base = Address::invalid();
7401     Address LB = Address::invalid();
7402     bool IsArraySection = false;
7403     bool HasCompleteRecord = false;
7404   };
7405 
7406 private:
7407   /// Kind that defines how a device pointer has to be returned.
7408   struct MapInfo {
7409     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7410     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7411     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7412     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7413     bool ReturnDevicePointer = false;
7414     bool IsImplicit = false;
7415     const ValueDecl *Mapper = nullptr;
7416     const Expr *VarRef = nullptr;
7417     bool ForDeviceAddr = false;
7418 
7419     MapInfo() = default;
7420     MapInfo(
7421         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7422         OpenMPMapClauseKind MapType,
7423         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7424         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7425         bool ReturnDevicePointer, bool IsImplicit,
7426         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7427         bool ForDeviceAddr = false)
7428         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7429           MotionModifiers(MotionModifiers),
7430           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7431           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7432   };
7433 
7434   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7435   /// member and there is no map information about it, then emission of that
7436   /// entry is deferred until the whole struct has been processed.
7437   struct DeferredDevicePtrEntryTy {
7438     const Expr *IE = nullptr;
7439     const ValueDecl *VD = nullptr;
7440     bool ForDeviceAddr = false;
7441 
7442     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7443                              bool ForDeviceAddr)
7444         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7445   };
7446 
7447   /// The target directive from where the mappable clauses were extracted. It
7448   /// is either a executable directive or a user-defined mapper directive.
7449   llvm::PointerUnion<const OMPExecutableDirective *,
7450                      const OMPDeclareMapperDecl *>
7451       CurDir;
7452 
7453   /// Function the directive is being generated for.
7454   CodeGenFunction &CGF;
7455 
7456   /// Set of all first private variables in the current directive.
7457   /// bool data is set to true if the variable is implicitly marked as
7458   /// firstprivate, false otherwise.
7459   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7460 
7461   /// Map between device pointer declarations and their expression components.
7462   /// The key value for declarations in 'this' is null.
7463   llvm::DenseMap<
7464       const ValueDecl *,
7465       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7466       DevPointersMap;
7467 
7468   /// Map between lambda declarations and their map type.
7469   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7470 
7471   llvm::Value *getExprTypeSize(const Expr *E) const {
7472     QualType ExprTy = E->getType().getCanonicalType();
7473 
7474     // Calculate the size for array shaping expression.
7475     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7476       llvm::Value *Size =
7477           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7478       for (const Expr *SE : OAE->getDimensions()) {
7479         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7480         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7481                                       CGF.getContext().getSizeType(),
7482                                       SE->getExprLoc());
7483         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7484       }
7485       return Size;
7486     }
7487 
7488     // Reference types are ignored for mapping purposes.
7489     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7490       ExprTy = RefTy->getPointeeType().getCanonicalType();
7491 
7492     // Given that an array section is considered a built-in type, we need to
7493     // do the calculation based on the length of the section instead of relying
7494     // on CGF.getTypeSize(E->getType()).
7495     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7496       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7497                             OAE->getBase()->IgnoreParenImpCasts())
7498                             .getCanonicalType();
7499 
7500       // If there is no length associated with the expression and lower bound is
7501       // not specified too, that means we are using the whole length of the
7502       // base.
7503       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7504           !OAE->getLowerBound())
7505         return CGF.getTypeSize(BaseTy);
7506 
7507       llvm::Value *ElemSize;
7508       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7509         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7510       } else {
7511         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7512         assert(ATy && "Expecting array type if not a pointer type.");
7513         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7514       }
7515 
7516       // If we don't have a length at this point, that is because we have an
7517       // array section with a single element.
7518       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7519         return ElemSize;
7520 
7521       if (const Expr *LenExpr = OAE->getLength()) {
7522         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7523         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7524                                              CGF.getContext().getSizeType(),
7525                                              LenExpr->getExprLoc());
7526         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7527       }
7528       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7529              OAE->getLowerBound() && "expected array_section[lb:].");
7530       // Size = sizetype - lb * elemtype;
7531       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7532       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7533       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7534                                        CGF.getContext().getSizeType(),
7535                                        OAE->getLowerBound()->getExprLoc());
7536       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7537       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7538       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7539       LengthVal = CGF.Builder.CreateSelect(
7540           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7541       return LengthVal;
7542     }
7543     return CGF.getTypeSize(ExprTy);
7544   }
7545 
7546   /// Return the corresponding bits for a given map clause modifier. Add
7547   /// a flag marking the map as a pointer if requested. Add a flag marking the
7548   /// map as the first one of a series of maps that relate to the same map
7549   /// expression.
7550   OpenMPOffloadMappingFlags getMapTypeBits(
7551       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7552       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7553       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7554     OpenMPOffloadMappingFlags Bits =
7555         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7556     switch (MapType) {
7557     case OMPC_MAP_alloc:
7558     case OMPC_MAP_release:
7559       // alloc and release is the default behavior in the runtime library,  i.e.
7560       // if we don't pass any bits alloc/release that is what the runtime is
7561       // going to do. Therefore, we don't need to signal anything for these two
7562       // type modifiers.
7563       break;
7564     case OMPC_MAP_to:
7565       Bits |= OMP_MAP_TO;
7566       break;
7567     case OMPC_MAP_from:
7568       Bits |= OMP_MAP_FROM;
7569       break;
7570     case OMPC_MAP_tofrom:
7571       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7572       break;
7573     case OMPC_MAP_delete:
7574       Bits |= OMP_MAP_DELETE;
7575       break;
7576     case OMPC_MAP_unknown:
7577       llvm_unreachable("Unexpected map type!");
7578     }
7579     if (AddPtrFlag)
7580       Bits |= OMP_MAP_PTR_AND_OBJ;
7581     if (AddIsTargetParamFlag)
7582       Bits |= OMP_MAP_TARGET_PARAM;
7583     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7584       Bits |= OMP_MAP_ALWAYS;
7585     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7586       Bits |= OMP_MAP_CLOSE;
7587     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7588         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7589       Bits |= OMP_MAP_PRESENT;
7590     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7591       Bits |= OMP_MAP_OMPX_HOLD;
7592     if (IsNonContiguous)
7593       Bits |= OMP_MAP_NON_CONTIG;
7594     return Bits;
7595   }
7596 
7597   /// Return true if the provided expression is a final array section. A
7598   /// final array section, is one whose length can't be proved to be one.
7599   bool isFinalArraySectionExpression(const Expr *E) const {
7600     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7601 
7602     // It is not an array section and therefore not a unity-size one.
7603     if (!OASE)
7604       return false;
7605 
7606     // An array section with no colon always refer to a single element.
7607     if (OASE->getColonLocFirst().isInvalid())
7608       return false;
7609 
7610     const Expr *Length = OASE->getLength();
7611 
7612     // If we don't have a length we have to check if the array has size 1
7613     // for this dimension. Also, we should always expect a length if the
7614     // base type is pointer.
7615     if (!Length) {
7616       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7617                              OASE->getBase()->IgnoreParenImpCasts())
7618                              .getCanonicalType();
7619       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7620         return ATy->getSize().getSExtValue() != 1;
7621       // If we don't have a constant dimension length, we have to consider
7622       // the current section as having any size, so it is not necessarily
7623       // unitary. If it happen to be unity size, that's user fault.
7624       return true;
7625     }
7626 
7627     // Check if the length evaluates to 1.
7628     Expr::EvalResult Result;
7629     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7630       return true; // Can have more that size 1.
7631 
7632     llvm::APSInt ConstLength = Result.Val.getInt();
7633     return ConstLength.getSExtValue() != 1;
7634   }
7635 
7636   /// Generate the base pointers, section pointers, sizes, map type bits, and
7637   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7638   /// map type, map or motion modifiers, and expression components.
7639   /// \a IsFirstComponent should be set to true if the provided set of
7640   /// components is the first associated with a capture.
7641   void generateInfoForComponentList(
7642       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7643       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7644       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7645       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7646       bool IsFirstComponentList, bool IsImplicit,
7647       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7648       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7649       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7650           OverlappedElements = llvm::None) const {
7651     // The following summarizes what has to be generated for each map and the
7652     // types below. The generated information is expressed in this order:
7653     // base pointer, section pointer, size, flags
7654     // (to add to the ones that come from the map type and modifier).
7655     //
7656     // double d;
7657     // int i[100];
7658     // float *p;
7659     //
7660     // struct S1 {
7661     //   int i;
7662     //   float f[50];
7663     // }
7664     // struct S2 {
7665     //   int i;
7666     //   float f[50];
7667     //   S1 s;
7668     //   double *p;
7669     //   struct S2 *ps;
7670     //   int &ref;
7671     // }
7672     // S2 s;
7673     // S2 *ps;
7674     //
7675     // map(d)
7676     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7677     //
7678     // map(i)
7679     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7680     //
7681     // map(i[1:23])
7682     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7683     //
7684     // map(p)
7685     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7686     //
7687     // map(p[1:24])
7688     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7689     // in unified shared memory mode or for local pointers
7690     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7691     //
7692     // map(s)
7693     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7694     //
7695     // map(s.i)
7696     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7697     //
7698     // map(s.s.f)
7699     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7700     //
7701     // map(s.p)
7702     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7703     //
7704     // map(to: s.p[:22])
7705     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7706     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7707     // &(s.p), &(s.p[0]), 22*sizeof(double),
7708     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7709     // (*) alloc space for struct members, only this is a target parameter
7710     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7711     //      optimizes this entry out, same in the examples below)
7712     // (***) map the pointee (map: to)
7713     //
7714     // map(to: s.ref)
7715     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7716     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7717     // (*) alloc space for struct members, only this is a target parameter
7718     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7719     //      optimizes this entry out, same in the examples below)
7720     // (***) map the pointee (map: to)
7721     //
7722     // map(s.ps)
7723     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7724     //
7725     // map(from: s.ps->s.i)
7726     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7727     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7728     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7729     //
7730     // map(to: s.ps->ps)
7731     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7732     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7733     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7734     //
7735     // map(s.ps->ps->ps)
7736     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7737     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7738     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7739     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7740     //
7741     // map(to: s.ps->ps->s.f[:22])
7742     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7743     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7744     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7745     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7746     //
7747     // map(ps)
7748     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7749     //
7750     // map(ps->i)
7751     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7752     //
7753     // map(ps->s.f)
7754     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7755     //
7756     // map(from: ps->p)
7757     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7758     //
7759     // map(to: ps->p[:22])
7760     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7761     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7762     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7763     //
7764     // map(ps->ps)
7765     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7766     //
7767     // map(from: ps->ps->s.i)
7768     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7769     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7770     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7771     //
7772     // map(from: ps->ps->ps)
7773     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7774     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7775     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7776     //
7777     // map(ps->ps->ps->ps)
7778     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7779     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7780     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7781     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7782     //
7783     // map(to: ps->ps->ps->s.f[:22])
7784     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7785     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7786     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7787     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7788     //
7789     // map(to: s.f[:22]) map(from: s.p[:33])
7790     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7791     //     sizeof(double*) (**), TARGET_PARAM
7792     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7793     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7794     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7795     // (*) allocate contiguous space needed to fit all mapped members even if
7796     //     we allocate space for members not mapped (in this example,
7797     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7798     //     them as well because they fall between &s.f[0] and &s.p)
7799     //
7800     // map(from: s.f[:22]) map(to: ps->p[:33])
7801     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7802     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7803     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7804     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7805     // (*) the struct this entry pertains to is the 2nd element in the list of
7806     //     arguments, hence MEMBER_OF(2)
7807     //
7808     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7809     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7810     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7811     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7812     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7813     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7814     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7815     // (*) the struct this entry pertains to is the 4th element in the list
7816     //     of arguments, hence MEMBER_OF(4)
7817 
7818     // Track if the map information being generated is the first for a capture.
7819     bool IsCaptureFirstInfo = IsFirstComponentList;
7820     // When the variable is on a declare target link or in a to clause with
7821     // unified memory, a reference is needed to hold the host/device address
7822     // of the variable.
7823     bool RequiresReference = false;
7824 
7825     // Scan the components from the base to the complete expression.
7826     auto CI = Components.rbegin();
7827     auto CE = Components.rend();
7828     auto I = CI;
7829 
7830     // Track if the map information being generated is the first for a list of
7831     // components.
7832     bool IsExpressionFirstInfo = true;
7833     bool FirstPointerInComplexData = false;
7834     Address BP = Address::invalid();
7835     const Expr *AssocExpr = I->getAssociatedExpression();
7836     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7837     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7838     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7839 
7840     if (isa<MemberExpr>(AssocExpr)) {
7841       // The base is the 'this' pointer. The content of the pointer is going
7842       // to be the base of the field being mapped.
7843       BP = CGF.LoadCXXThisAddress();
7844     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7845                (OASE &&
7846                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7847       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7848     } else if (OAShE &&
7849                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7850       BP = Address(
7851           CGF.EmitScalarExpr(OAShE->getBase()),
7852           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7853           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7854     } else {
7855       // The base is the reference to the variable.
7856       // BP = &Var.
7857       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7858       if (const auto *VD =
7859               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7860         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7861                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7862           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7863               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7864                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7865             RequiresReference = true;
7866             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7867           }
7868         }
7869       }
7870 
7871       // If the variable is a pointer and is being dereferenced (i.e. is not
7872       // the last component), the base has to be the pointer itself, not its
7873       // reference. References are ignored for mapping purposes.
7874       QualType Ty =
7875           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7876       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7877         // No need to generate individual map information for the pointer, it
7878         // can be associated with the combined storage if shared memory mode is
7879         // active or the base declaration is not global variable.
7880         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7881         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7882             !VD || VD->hasLocalStorage())
7883           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7884         else
7885           FirstPointerInComplexData = true;
7886         ++I;
7887       }
7888     }
7889 
7890     // Track whether a component of the list should be marked as MEMBER_OF some
7891     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7892     // in a component list should be marked as MEMBER_OF, all subsequent entries
7893     // do not belong to the base struct. E.g.
7894     // struct S2 s;
7895     // s.ps->ps->ps->f[:]
7896     //   (1) (2) (3) (4)
7897     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7898     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7899     // is the pointee of ps(2) which is not member of struct s, so it should not
7900     // be marked as such (it is still PTR_AND_OBJ).
7901     // The variable is initialized to false so that PTR_AND_OBJ entries which
7902     // are not struct members are not considered (e.g. array of pointers to
7903     // data).
7904     bool ShouldBeMemberOf = false;
7905 
7906     // Variable keeping track of whether or not we have encountered a component
7907     // in the component list which is a member expression. Useful when we have a
7908     // pointer or a final array section, in which case it is the previous
7909     // component in the list which tells us whether we have a member expression.
7910     // E.g. X.f[:]
7911     // While processing the final array section "[:]" it is "f" which tells us
7912     // whether we are dealing with a member of a declared struct.
7913     const MemberExpr *EncounteredME = nullptr;
7914 
7915     // Track for the total number of dimension. Start from one for the dummy
7916     // dimension.
7917     uint64_t DimSize = 1;
7918 
7919     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7920     bool IsPrevMemberReference = false;
7921 
7922     for (; I != CE; ++I) {
7923       // If the current component is member of a struct (parent struct) mark it.
7924       if (!EncounteredME) {
7925         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7926         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7927         // as MEMBER_OF the parent struct.
7928         if (EncounteredME) {
7929           ShouldBeMemberOf = true;
7930           // Do not emit as complex pointer if this is actually not array-like
7931           // expression.
7932           if (FirstPointerInComplexData) {
7933             QualType Ty = std::prev(I)
7934                               ->getAssociatedDeclaration()
7935                               ->getType()
7936                               .getNonReferenceType();
7937             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7938             FirstPointerInComplexData = false;
7939           }
7940         }
7941       }
7942 
7943       auto Next = std::next(I);
7944 
7945       // We need to generate the addresses and sizes if this is the last
7946       // component, if the component is a pointer or if it is an array section
7947       // whose length can't be proved to be one. If this is a pointer, it
7948       // becomes the base address for the following components.
7949 
7950       // A final array section, is one whose length can't be proved to be one.
7951       // If the map item is non-contiguous then we don't treat any array section
7952       // as final array section.
7953       bool IsFinalArraySection =
7954           !IsNonContiguous &&
7955           isFinalArraySectionExpression(I->getAssociatedExpression());
7956 
7957       // If we have a declaration for the mapping use that, otherwise use
7958       // the base declaration of the map clause.
7959       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7960                                      ? I->getAssociatedDeclaration()
7961                                      : BaseDecl;
7962       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7963                                                : MapExpr;
7964 
7965       // Get information on whether the element is a pointer. Have to do a
7966       // special treatment for array sections given that they are built-in
7967       // types.
7968       const auto *OASE =
7969           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7970       const auto *OAShE =
7971           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7972       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7973       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7974       bool IsPointer =
7975           OAShE ||
7976           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7977                        .getCanonicalType()
7978                        ->isAnyPointerType()) ||
7979           I->getAssociatedExpression()->getType()->isAnyPointerType();
7980       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7981                                MapDecl &&
7982                                MapDecl->getType()->isLValueReferenceType();
7983       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7984 
7985       if (OASE)
7986         ++DimSize;
7987 
7988       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7989           IsFinalArraySection) {
7990         // If this is not the last component, we expect the pointer to be
7991         // associated with an array expression or member expression.
7992         assert((Next == CE ||
7993                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7994                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7995                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7996                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7997                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7998                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7999                "Unexpected expression");
8000 
8001         Address LB = Address::invalid();
8002         Address LowestElem = Address::invalid();
8003         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8004                                        const MemberExpr *E) {
8005           const Expr *BaseExpr = E->getBase();
8006           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8007           // scalar.
8008           LValue BaseLV;
8009           if (E->isArrow()) {
8010             LValueBaseInfo BaseInfo;
8011             TBAAAccessInfo TBAAInfo;
8012             Address Addr =
8013                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8014             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8015             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8016           } else {
8017             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8018           }
8019           return BaseLV;
8020         };
8021         if (OAShE) {
8022           LowestElem = LB =
8023               Address(CGF.EmitScalarExpr(OAShE->getBase()),
8024                       CGF.ConvertTypeForMem(
8025                           OAShE->getBase()->getType()->getPointeeType()),
8026                       CGF.getContext().getTypeAlignInChars(
8027                           OAShE->getBase()->getType()));
8028         } else if (IsMemberReference) {
8029           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8030           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8031           LowestElem = CGF.EmitLValueForFieldInitialization(
8032                               BaseLVal, cast<FieldDecl>(MapDecl))
8033                            .getAddress(CGF);
8034           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8035                    .getAddress(CGF);
8036         } else {
8037           LowestElem = LB =
8038               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8039                   .getAddress(CGF);
8040         }
8041 
8042         // If this component is a pointer inside the base struct then we don't
8043         // need to create any entry for it - it will be combined with the object
8044         // it is pointing to into a single PTR_AND_OBJ entry.
8045         bool IsMemberPointerOrAddr =
8046             EncounteredME &&
8047             (((IsPointer || ForDeviceAddr) &&
8048               I->getAssociatedExpression() == EncounteredME) ||
8049              (IsPrevMemberReference && !IsPointer) ||
8050              (IsMemberReference && Next != CE &&
8051               !Next->getAssociatedExpression()->getType()->isPointerType()));
8052         if (!OverlappedElements.empty() && Next == CE) {
8053           // Handle base element with the info for overlapped elements.
8054           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8055           assert(!IsPointer &&
8056                  "Unexpected base element with the pointer type.");
8057           // Mark the whole struct as the struct that requires allocation on the
8058           // device.
8059           PartialStruct.LowestElem = {0, LowestElem};
8060           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8061               I->getAssociatedExpression()->getType());
8062           Address HB = CGF.Builder.CreateConstGEP(
8063               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8064                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8065               TypeSize.getQuantity() - 1);
8066           PartialStruct.HighestElem = {
8067               std::numeric_limits<decltype(
8068                   PartialStruct.HighestElem.first)>::max(),
8069               HB};
8070           PartialStruct.Base = BP;
8071           PartialStruct.LB = LB;
8072           assert(
8073               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8074               "Overlapped elements must be used only once for the variable.");
8075           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8076           // Emit data for non-overlapped data.
8077           OpenMPOffloadMappingFlags Flags =
8078               OMP_MAP_MEMBER_OF |
8079               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8080                              /*AddPtrFlag=*/false,
8081                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8082           llvm::Value *Size = nullptr;
8083           // Do bitcopy of all non-overlapped structure elements.
8084           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8085                    Component : OverlappedElements) {
8086             Address ComponentLB = Address::invalid();
8087             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8088                  Component) {
8089               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8090                 const auto *FD = dyn_cast<FieldDecl>(VD);
8091                 if (FD && FD->getType()->isLValueReferenceType()) {
8092                   const auto *ME =
8093                       cast<MemberExpr>(MC.getAssociatedExpression());
8094                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8095                   ComponentLB =
8096                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8097                           .getAddress(CGF);
8098                 } else {
8099                   ComponentLB =
8100                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8101                           .getAddress(CGF);
8102                 }
8103                 Size = CGF.Builder.CreatePtrDiff(
8104                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8105                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8106                 break;
8107               }
8108             }
8109             assert(Size && "Failed to determine structure size");
8110             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8111             CombinedInfo.BasePointers.push_back(BP.getPointer());
8112             CombinedInfo.Pointers.push_back(LB.getPointer());
8113             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8114                 Size, CGF.Int64Ty, /*isSigned=*/true));
8115             CombinedInfo.Types.push_back(Flags);
8116             CombinedInfo.Mappers.push_back(nullptr);
8117             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8118                                                                       : 1);
8119             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8120           }
8121           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8122           CombinedInfo.BasePointers.push_back(BP.getPointer());
8123           CombinedInfo.Pointers.push_back(LB.getPointer());
8124           Size = CGF.Builder.CreatePtrDiff(
8125               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8126               CGF.EmitCastToVoidPtr(LB.getPointer()));
8127           CombinedInfo.Sizes.push_back(
8128               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8129           CombinedInfo.Types.push_back(Flags);
8130           CombinedInfo.Mappers.push_back(nullptr);
8131           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8132                                                                     : 1);
8133           break;
8134         }
8135         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8136         if (!IsMemberPointerOrAddr ||
8137             (Next == CE && MapType != OMPC_MAP_unknown)) {
8138           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8139           CombinedInfo.BasePointers.push_back(BP.getPointer());
8140           CombinedInfo.Pointers.push_back(LB.getPointer());
8141           CombinedInfo.Sizes.push_back(
8142               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8143           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8144                                                                     : 1);
8145 
8146           // If Mapper is valid, the last component inherits the mapper.
8147           bool HasMapper = Mapper && Next == CE;
8148           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8149 
8150           // We need to add a pointer flag for each map that comes from the
8151           // same expression except for the first one. We also need to signal
8152           // this map is the first one that relates with the current capture
8153           // (there is a set of entries for each capture).
8154           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8155               MapType, MapModifiers, MotionModifiers, IsImplicit,
8156               !IsExpressionFirstInfo || RequiresReference ||
8157                   FirstPointerInComplexData || IsMemberReference,
8158               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8159 
8160           if (!IsExpressionFirstInfo || IsMemberReference) {
8161             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8162             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8163             if (IsPointer || (IsMemberReference && Next != CE))
8164               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8165                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8166 
8167             if (ShouldBeMemberOf) {
8168               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8169               // should be later updated with the correct value of MEMBER_OF.
8170               Flags |= OMP_MAP_MEMBER_OF;
8171               // From now on, all subsequent PTR_AND_OBJ entries should not be
8172               // marked as MEMBER_OF.
8173               ShouldBeMemberOf = false;
8174             }
8175           }
8176 
8177           CombinedInfo.Types.push_back(Flags);
8178         }
8179 
8180         // If we have encountered a member expression so far, keep track of the
8181         // mapped member. If the parent is "*this", then the value declaration
8182         // is nullptr.
8183         if (EncounteredME) {
8184           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8185           unsigned FieldIndex = FD->getFieldIndex();
8186 
8187           // Update info about the lowest and highest elements for this struct
8188           if (!PartialStruct.Base.isValid()) {
8189             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8190             if (IsFinalArraySection) {
8191               Address HB =
8192                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8193                       .getAddress(CGF);
8194               PartialStruct.HighestElem = {FieldIndex, HB};
8195             } else {
8196               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8197             }
8198             PartialStruct.Base = BP;
8199             PartialStruct.LB = BP;
8200           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8201             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8202           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8203             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8204           }
8205         }
8206 
8207         // Need to emit combined struct for array sections.
8208         if (IsFinalArraySection || IsNonContiguous)
8209           PartialStruct.IsArraySection = true;
8210 
8211         // If we have a final array section, we are done with this expression.
8212         if (IsFinalArraySection)
8213           break;
8214 
8215         // The pointer becomes the base for the next element.
8216         if (Next != CE)
8217           BP = IsMemberReference ? LowestElem : LB;
8218 
8219         IsExpressionFirstInfo = false;
8220         IsCaptureFirstInfo = false;
8221         FirstPointerInComplexData = false;
8222         IsPrevMemberReference = IsMemberReference;
8223       } else if (FirstPointerInComplexData) {
8224         QualType Ty = Components.rbegin()
8225                           ->getAssociatedDeclaration()
8226                           ->getType()
8227                           .getNonReferenceType();
8228         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8229         FirstPointerInComplexData = false;
8230       }
8231     }
8232     // If ran into the whole component - allocate the space for the whole
8233     // record.
8234     if (!EncounteredME)
8235       PartialStruct.HasCompleteRecord = true;
8236 
8237     if (!IsNonContiguous)
8238       return;
8239 
8240     const ASTContext &Context = CGF.getContext();
8241 
8242     // For supporting stride in array section, we need to initialize the first
8243     // dimension size as 1, first offset as 0, and first count as 1
8244     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8245     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8246     MapValuesArrayTy CurStrides;
8247     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8248     uint64_t ElementTypeSize;
8249 
8250     // Collect Size information for each dimension and get the element size as
8251     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8252     // should be [10, 10] and the first stride is 4 btyes.
8253     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8254          Components) {
8255       const Expr *AssocExpr = Component.getAssociatedExpression();
8256       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8257 
8258       if (!OASE)
8259         continue;
8260 
8261       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8262       auto *CAT = Context.getAsConstantArrayType(Ty);
8263       auto *VAT = Context.getAsVariableArrayType(Ty);
8264 
8265       // We need all the dimension size except for the last dimension.
8266       assert((VAT || CAT || &Component == &*Components.begin()) &&
8267              "Should be either ConstantArray or VariableArray if not the "
8268              "first Component");
8269 
8270       // Get element size if CurStrides is empty.
8271       if (CurStrides.empty()) {
8272         const Type *ElementType = nullptr;
8273         if (CAT)
8274           ElementType = CAT->getElementType().getTypePtr();
8275         else if (VAT)
8276           ElementType = VAT->getElementType().getTypePtr();
8277         else
8278           assert(&Component == &*Components.begin() &&
8279                  "Only expect pointer (non CAT or VAT) when this is the "
8280                  "first Component");
8281         // If ElementType is null, then it means the base is a pointer
8282         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8283         // for next iteration.
8284         if (ElementType) {
8285           // For the case that having pointer as base, we need to remove one
8286           // level of indirection.
8287           if (&Component != &*Components.begin())
8288             ElementType = ElementType->getPointeeOrArrayElementType();
8289           ElementTypeSize =
8290               Context.getTypeSizeInChars(ElementType).getQuantity();
8291           CurStrides.push_back(
8292               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8293         }
8294       }
8295       // Get dimension value except for the last dimension since we don't need
8296       // it.
8297       if (DimSizes.size() < Components.size() - 1) {
8298         if (CAT)
8299           DimSizes.push_back(llvm::ConstantInt::get(
8300               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8301         else if (VAT)
8302           DimSizes.push_back(CGF.Builder.CreateIntCast(
8303               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8304               /*IsSigned=*/false));
8305       }
8306     }
8307 
8308     // Skip the dummy dimension since we have already have its information.
8309     auto *DI = DimSizes.begin() + 1;
8310     // Product of dimension.
8311     llvm::Value *DimProd =
8312         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8313 
8314     // Collect info for non-contiguous. Notice that offset, count, and stride
8315     // are only meaningful for array-section, so we insert a null for anything
8316     // other than array-section.
8317     // Also, the size of offset, count, and stride are not the same as
8318     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8319     // count, and stride are the same as the number of non-contiguous
8320     // declaration in target update to/from clause.
8321     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8322          Components) {
8323       const Expr *AssocExpr = Component.getAssociatedExpression();
8324 
8325       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8326         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8327             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8328             /*isSigned=*/false);
8329         CurOffsets.push_back(Offset);
8330         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8331         CurStrides.push_back(CurStrides.back());
8332         continue;
8333       }
8334 
8335       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8336 
8337       if (!OASE)
8338         continue;
8339 
8340       // Offset
8341       const Expr *OffsetExpr = OASE->getLowerBound();
8342       llvm::Value *Offset = nullptr;
8343       if (!OffsetExpr) {
8344         // If offset is absent, then we just set it to zero.
8345         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8346       } else {
8347         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8348                                            CGF.Int64Ty,
8349                                            /*isSigned=*/false);
8350       }
8351       CurOffsets.push_back(Offset);
8352 
8353       // Count
8354       const Expr *CountExpr = OASE->getLength();
8355       llvm::Value *Count = nullptr;
8356       if (!CountExpr) {
8357         // In Clang, once a high dimension is an array section, we construct all
8358         // the lower dimension as array section, however, for case like
8359         // arr[0:2][2], Clang construct the inner dimension as an array section
8360         // but it actually is not in an array section form according to spec.
8361         if (!OASE->getColonLocFirst().isValid() &&
8362             !OASE->getColonLocSecond().isValid()) {
8363           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8364         } else {
8365           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8366           // When the length is absent it defaults to ⌈(size −
8367           // lower-bound)/stride⌉, where size is the size of the array
8368           // dimension.
8369           const Expr *StrideExpr = OASE->getStride();
8370           llvm::Value *Stride =
8371               StrideExpr
8372                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8373                                               CGF.Int64Ty, /*isSigned=*/false)
8374                   : nullptr;
8375           if (Stride)
8376             Count = CGF.Builder.CreateUDiv(
8377                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8378           else
8379             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8380         }
8381       } else {
8382         Count = CGF.EmitScalarExpr(CountExpr);
8383       }
8384       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8385       CurCounts.push_back(Count);
8386 
8387       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8388       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8389       //              Offset      Count     Stride
8390       //    D0          0           1         4    (int)    <- dummy dimension
8391       //    D1          0           2         8    (2 * (1) * 4)
8392       //    D2          1           2         20   (1 * (1 * 5) * 4)
8393       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8394       const Expr *StrideExpr = OASE->getStride();
8395       llvm::Value *Stride =
8396           StrideExpr
8397               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8398                                           CGF.Int64Ty, /*isSigned=*/false)
8399               : nullptr;
8400       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8401       if (Stride)
8402         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8403       else
8404         CurStrides.push_back(DimProd);
8405       if (DI != DimSizes.end())
8406         ++DI;
8407     }
8408 
8409     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8410     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8411     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8412   }
8413 
8414   /// Return the adjusted map modifiers if the declaration a capture refers to
8415   /// appears in a first-private clause. This is expected to be used only with
8416   /// directives that start with 'target'.
8417   MappableExprsHandler::OpenMPOffloadMappingFlags
8418   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8419     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8420 
8421     // A first private variable captured by reference will use only the
8422     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8423     // declaration is known as first-private in this handler.
8424     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8425       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8426         return MappableExprsHandler::OMP_MAP_TO |
8427                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8428       return MappableExprsHandler::OMP_MAP_PRIVATE |
8429              MappableExprsHandler::OMP_MAP_TO;
8430     }
8431     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8432     if (I != LambdasMap.end())
8433       // for map(to: lambda): using user specified map type.
8434       return getMapTypeBits(
8435           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8436           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8437           /*AddPtrFlag=*/false,
8438           /*AddIsTargetParamFlag=*/false,
8439           /*isNonContiguous=*/false);
8440     return MappableExprsHandler::OMP_MAP_TO |
8441            MappableExprsHandler::OMP_MAP_FROM;
8442   }
8443 
8444   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8445     // Rotate by getFlagMemberOffset() bits.
8446     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8447                                                   << getFlagMemberOffset());
8448   }
8449 
8450   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8451                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8452     // If the entry is PTR_AND_OBJ but has not been marked with the special
8453     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8454     // marked as MEMBER_OF.
8455     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8456         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8457       return;
8458 
8459     // Reset the placeholder value to prepare the flag for the assignment of the
8460     // proper MEMBER_OF value.
8461     Flags &= ~OMP_MAP_MEMBER_OF;
8462     Flags |= MemberOfFlag;
8463   }
8464 
8465   void getPlainLayout(const CXXRecordDecl *RD,
8466                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8467                       bool AsBase) const {
8468     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8469 
8470     llvm::StructType *St =
8471         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8472 
8473     unsigned NumElements = St->getNumElements();
8474     llvm::SmallVector<
8475         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8476         RecordLayout(NumElements);
8477 
8478     // Fill bases.
8479     for (const auto &I : RD->bases()) {
8480       if (I.isVirtual())
8481         continue;
8482       const auto *Base = I.getType()->getAsCXXRecordDecl();
8483       // Ignore empty bases.
8484       if (Base->isEmpty() || CGF.getContext()
8485                                  .getASTRecordLayout(Base)
8486                                  .getNonVirtualSize()
8487                                  .isZero())
8488         continue;
8489 
8490       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8491       RecordLayout[FieldIndex] = Base;
8492     }
8493     // Fill in virtual bases.
8494     for (const auto &I : RD->vbases()) {
8495       const auto *Base = I.getType()->getAsCXXRecordDecl();
8496       // Ignore empty bases.
8497       if (Base->isEmpty())
8498         continue;
8499       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8500       if (RecordLayout[FieldIndex])
8501         continue;
8502       RecordLayout[FieldIndex] = Base;
8503     }
8504     // Fill in all the fields.
8505     assert(!RD->isUnion() && "Unexpected union.");
8506     for (const auto *Field : RD->fields()) {
8507       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8508       // will fill in later.)
8509       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8510         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8511         RecordLayout[FieldIndex] = Field;
8512       }
8513     }
8514     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8515              &Data : RecordLayout) {
8516       if (Data.isNull())
8517         continue;
8518       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8519         getPlainLayout(Base, Layout, /*AsBase=*/true);
8520       else
8521         Layout.push_back(Data.get<const FieldDecl *>());
8522     }
8523   }
8524 
8525   /// Generate all the base pointers, section pointers, sizes, map types, and
8526   /// mappers for the extracted mappable expressions (all included in \a
8527   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8528   /// pair of the relevant declaration and index where it occurs is appended to
8529   /// the device pointers info array.
8530   void generateAllInfoForClauses(
8531       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8532       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8533           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8534     // We have to process the component lists that relate with the same
8535     // declaration in a single chunk so that we can generate the map flags
8536     // correctly. Therefore, we organize all lists in a map.
8537     enum MapKind { Present, Allocs, Other, Total };
8538     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8539                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8540         Info;
8541 
8542     // Helper function to fill the information map for the different supported
8543     // clauses.
8544     auto &&InfoGen =
8545         [&Info, &SkipVarSet](
8546             const ValueDecl *D, MapKind Kind,
8547             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8548             OpenMPMapClauseKind MapType,
8549             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8550             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8551             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8552             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8553           if (SkipVarSet.contains(D))
8554             return;
8555           auto It = Info.find(D);
8556           if (It == Info.end())
8557             It = Info
8558                      .insert(std::make_pair(
8559                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8560                      .first;
8561           It->second[Kind].emplace_back(
8562               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8563               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8564         };
8565 
8566     for (const auto *Cl : Clauses) {
8567       const auto *C = dyn_cast<OMPMapClause>(Cl);
8568       if (!C)
8569         continue;
8570       MapKind Kind = Other;
8571       if (llvm::is_contained(C->getMapTypeModifiers(),
8572                              OMPC_MAP_MODIFIER_present))
8573         Kind = Present;
8574       else if (C->getMapType() == OMPC_MAP_alloc)
8575         Kind = Allocs;
8576       const auto *EI = C->getVarRefs().begin();
8577       for (const auto L : C->component_lists()) {
8578         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8579         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8580                 C->getMapTypeModifiers(), llvm::None,
8581                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8582                 E);
8583         ++EI;
8584       }
8585     }
8586     for (const auto *Cl : Clauses) {
8587       const auto *C = dyn_cast<OMPToClause>(Cl);
8588       if (!C)
8589         continue;
8590       MapKind Kind = Other;
8591       if (llvm::is_contained(C->getMotionModifiers(),
8592                              OMPC_MOTION_MODIFIER_present))
8593         Kind = Present;
8594       const auto *EI = C->getVarRefs().begin();
8595       for (const auto L : C->component_lists()) {
8596         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8597                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8598                 C->isImplicit(), std::get<2>(L), *EI);
8599         ++EI;
8600       }
8601     }
8602     for (const auto *Cl : Clauses) {
8603       const auto *C = dyn_cast<OMPFromClause>(Cl);
8604       if (!C)
8605         continue;
8606       MapKind Kind = Other;
8607       if (llvm::is_contained(C->getMotionModifiers(),
8608                              OMPC_MOTION_MODIFIER_present))
8609         Kind = Present;
8610       const auto *EI = C->getVarRefs().begin();
8611       for (const auto L : C->component_lists()) {
8612         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8613                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8614                 C->isImplicit(), std::get<2>(L), *EI);
8615         ++EI;
8616       }
8617     }
8618 
8619     // Look at the use_device_ptr clause information and mark the existing map
8620     // entries as such. If there is no map information for an entry in the
8621     // use_device_ptr list, we create one with map type 'alloc' and zero size
8622     // section. It is the user fault if that was not mapped before. If there is
8623     // no map information and the pointer is a struct member, then we defer the
8624     // emission of that entry until the whole struct has been processed.
8625     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8626                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8627         DeferredInfo;
8628     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8629 
8630     for (const auto *Cl : Clauses) {
8631       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8632       if (!C)
8633         continue;
8634       for (const auto L : C->component_lists()) {
8635         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8636             std::get<1>(L);
8637         assert(!Components.empty() &&
8638                "Not expecting empty list of components!");
8639         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8640         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8641         const Expr *IE = Components.back().getAssociatedExpression();
8642         // If the first component is a member expression, we have to look into
8643         // 'this', which maps to null in the map of map information. Otherwise
8644         // look directly for the information.
8645         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8646 
8647         // We potentially have map information for this declaration already.
8648         // Look for the first set of components that refer to it.
8649         if (It != Info.end()) {
8650           bool Found = false;
8651           for (auto &Data : It->second) {
8652             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8653               return MI.Components.back().getAssociatedDeclaration() == VD;
8654             });
8655             // If we found a map entry, signal that the pointer has to be
8656             // returned and move on to the next declaration. Exclude cases where
8657             // the base pointer is mapped as array subscript, array section or
8658             // array shaping. The base address is passed as a pointer to base in
8659             // this case and cannot be used as a base for use_device_ptr list
8660             // item.
8661             if (CI != Data.end()) {
8662               auto PrevCI = std::next(CI->Components.rbegin());
8663               const auto *VarD = dyn_cast<VarDecl>(VD);
8664               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8665                   isa<MemberExpr>(IE) ||
8666                   !VD->getType().getNonReferenceType()->isPointerType() ||
8667                   PrevCI == CI->Components.rend() ||
8668                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8669                   VarD->hasLocalStorage()) {
8670                 CI->ReturnDevicePointer = true;
8671                 Found = true;
8672                 break;
8673               }
8674             }
8675           }
8676           if (Found)
8677             continue;
8678         }
8679 
8680         // We didn't find any match in our map information - generate a zero
8681         // size array section - if the pointer is a struct member we defer this
8682         // action until the whole struct has been processed.
8683         if (isa<MemberExpr>(IE)) {
8684           // Insert the pointer into Info to be processed by
8685           // generateInfoForComponentList. Because it is a member pointer
8686           // without a pointee, no entry will be generated for it, therefore
8687           // we need to generate one after the whole struct has been processed.
8688           // Nonetheless, generateInfoForComponentList must be called to take
8689           // the pointer into account for the calculation of the range of the
8690           // partial struct.
8691           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8692                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8693                   nullptr);
8694           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8695         } else {
8696           llvm::Value *Ptr =
8697               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8698           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8699           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8700           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8701           UseDevicePtrCombinedInfo.Sizes.push_back(
8702               llvm::Constant::getNullValue(CGF.Int64Ty));
8703           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8704           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8705         }
8706       }
8707     }
8708 
8709     // Look at the use_device_addr clause information and mark the existing map
8710     // entries as such. If there is no map information for an entry in the
8711     // use_device_addr list, we create one with map type 'alloc' and zero size
8712     // section. It is the user fault if that was not mapped before. If there is
8713     // no map information and the pointer is a struct member, then we defer the
8714     // emission of that entry until the whole struct has been processed.
8715     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8716     for (const auto *Cl : Clauses) {
8717       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8718       if (!C)
8719         continue;
8720       for (const auto L : C->component_lists()) {
8721         assert(!std::get<1>(L).empty() &&
8722                "Not expecting empty list of components!");
8723         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8724         if (!Processed.insert(VD).second)
8725           continue;
8726         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8727         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8728         // If the first component is a member expression, we have to look into
8729         // 'this', which maps to null in the map of map information. Otherwise
8730         // look directly for the information.
8731         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8732 
8733         // We potentially have map information for this declaration already.
8734         // Look for the first set of components that refer to it.
8735         if (It != Info.end()) {
8736           bool Found = false;
8737           for (auto &Data : It->second) {
8738             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8739               return MI.Components.back().getAssociatedDeclaration() == VD;
8740             });
8741             // If we found a map entry, signal that the pointer has to be
8742             // returned and move on to the next declaration.
8743             if (CI != Data.end()) {
8744               CI->ReturnDevicePointer = true;
8745               Found = true;
8746               break;
8747             }
8748           }
8749           if (Found)
8750             continue;
8751         }
8752 
8753         // We didn't find any match in our map information - generate a zero
8754         // size array section - if the pointer is a struct member we defer this
8755         // action until the whole struct has been processed.
8756         if (isa<MemberExpr>(IE)) {
8757           // Insert the pointer into Info to be processed by
8758           // generateInfoForComponentList. Because it is a member pointer
8759           // without a pointee, no entry will be generated for it, therefore
8760           // we need to generate one after the whole struct has been processed.
8761           // Nonetheless, generateInfoForComponentList must be called to take
8762           // the pointer into account for the calculation of the range of the
8763           // partial struct.
8764           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8765                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8766                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8767           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8768         } else {
8769           llvm::Value *Ptr;
8770           if (IE->isGLValue())
8771             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8772           else
8773             Ptr = CGF.EmitScalarExpr(IE);
8774           CombinedInfo.Exprs.push_back(VD);
8775           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8776           CombinedInfo.Pointers.push_back(Ptr);
8777           CombinedInfo.Sizes.push_back(
8778               llvm::Constant::getNullValue(CGF.Int64Ty));
8779           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8780           CombinedInfo.Mappers.push_back(nullptr);
8781         }
8782       }
8783     }
8784 
8785     for (const auto &Data : Info) {
8786       StructRangeInfoTy PartialStruct;
8787       // Temporary generated information.
8788       MapCombinedInfoTy CurInfo;
8789       const Decl *D = Data.first;
8790       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8791       for (const auto &M : Data.second) {
8792         for (const MapInfo &L : M) {
8793           assert(!L.Components.empty() &&
8794                  "Not expecting declaration with no component lists.");
8795 
8796           // Remember the current base pointer index.
8797           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8798           CurInfo.NonContigInfo.IsNonContiguous =
8799               L.Components.back().isNonContiguous();
8800           generateInfoForComponentList(
8801               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8802               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8803               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8804 
8805           // If this entry relates with a device pointer, set the relevant
8806           // declaration and add the 'return pointer' flag.
8807           if (L.ReturnDevicePointer) {
8808             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8809                    "Unexpected number of mapped base pointers.");
8810 
8811             const ValueDecl *RelevantVD =
8812                 L.Components.back().getAssociatedDeclaration();
8813             assert(RelevantVD &&
8814                    "No relevant declaration related with device pointer??");
8815 
8816             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8817                 RelevantVD);
8818             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8819           }
8820         }
8821       }
8822 
8823       // Append any pending zero-length pointers which are struct members and
8824       // used with use_device_ptr or use_device_addr.
8825       auto CI = DeferredInfo.find(Data.first);
8826       if (CI != DeferredInfo.end()) {
8827         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8828           llvm::Value *BasePtr;
8829           llvm::Value *Ptr;
8830           if (L.ForDeviceAddr) {
8831             if (L.IE->isGLValue())
8832               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8833             else
8834               Ptr = this->CGF.EmitScalarExpr(L.IE);
8835             BasePtr = Ptr;
8836             // Entry is RETURN_PARAM. Also, set the placeholder value
8837             // MEMBER_OF=FFFF so that the entry is later updated with the
8838             // correct value of MEMBER_OF.
8839             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8840           } else {
8841             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8842             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8843                                              L.IE->getExprLoc());
8844             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8845             // placeholder value MEMBER_OF=FFFF so that the entry is later
8846             // updated with the correct value of MEMBER_OF.
8847             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8848                                     OMP_MAP_MEMBER_OF);
8849           }
8850           CurInfo.Exprs.push_back(L.VD);
8851           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8852           CurInfo.Pointers.push_back(Ptr);
8853           CurInfo.Sizes.push_back(
8854               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8855           CurInfo.Mappers.push_back(nullptr);
8856         }
8857       }
8858       // If there is an entry in PartialStruct it means we have a struct with
8859       // individual members mapped. Emit an extra combined entry.
8860       if (PartialStruct.Base.isValid()) {
8861         CurInfo.NonContigInfo.Dims.push_back(0);
8862         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8863       }
8864 
8865       // We need to append the results of this capture to what we already
8866       // have.
8867       CombinedInfo.append(CurInfo);
8868     }
8869     // Append data for use_device_ptr clauses.
8870     CombinedInfo.append(UseDevicePtrCombinedInfo);
8871   }
8872 
8873 public:
8874   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8875       : CurDir(&Dir), CGF(CGF) {
8876     // Extract firstprivate clause information.
8877     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8878       for (const auto *D : C->varlists())
8879         FirstPrivateDecls.try_emplace(
8880             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8881     // Extract implicit firstprivates from uses_allocators clauses.
8882     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8883       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8884         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8885         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8886           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8887                                         /*Implicit=*/true);
8888         else if (const auto *VD = dyn_cast<VarDecl>(
8889                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8890                          ->getDecl()))
8891           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8892       }
8893     }
8894     // Extract device pointer clause information.
8895     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8896       for (auto L : C->component_lists())
8897         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8898     // Extract map information.
8899     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8900       if (C->getMapType() != OMPC_MAP_to)
8901         continue;
8902       for (auto L : C->component_lists()) {
8903         const ValueDecl *VD = std::get<0>(L);
8904         const auto *RD = VD ? VD->getType()
8905                                   .getCanonicalType()
8906                                   .getNonReferenceType()
8907                                   ->getAsCXXRecordDecl()
8908                             : nullptr;
8909         if (RD && RD->isLambda())
8910           LambdasMap.try_emplace(std::get<0>(L), C);
8911       }
8912     }
8913   }
8914 
8915   /// Constructor for the declare mapper directive.
8916   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8917       : CurDir(&Dir), CGF(CGF) {}
8918 
8919   /// Generate code for the combined entry if we have a partially mapped struct
8920   /// and take care of the mapping flags of the arguments corresponding to
8921   /// individual struct members.
8922   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8923                          MapFlagsArrayTy &CurTypes,
8924                          const StructRangeInfoTy &PartialStruct,
8925                          const ValueDecl *VD = nullptr,
8926                          bool NotTargetParams = true) const {
8927     if (CurTypes.size() == 1 &&
8928         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8929         !PartialStruct.IsArraySection)
8930       return;
8931     Address LBAddr = PartialStruct.LowestElem.second;
8932     Address HBAddr = PartialStruct.HighestElem.second;
8933     if (PartialStruct.HasCompleteRecord) {
8934       LBAddr = PartialStruct.LB;
8935       HBAddr = PartialStruct.LB;
8936     }
8937     CombinedInfo.Exprs.push_back(VD);
8938     // Base is the base of the struct
8939     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8940     // Pointer is the address of the lowest element
8941     llvm::Value *LB = LBAddr.getPointer();
8942     CombinedInfo.Pointers.push_back(LB);
8943     // There should not be a mapper for a combined entry.
8944     CombinedInfo.Mappers.push_back(nullptr);
8945     // Size is (addr of {highest+1} element) - (addr of lowest element)
8946     llvm::Value *HB = HBAddr.getPointer();
8947     llvm::Value *HAddr =
8948         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8949     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8950     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8951     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8952     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8953                                                   /*isSigned=*/false);
8954     CombinedInfo.Sizes.push_back(Size);
8955     // Map type is always TARGET_PARAM, if generate info for captures.
8956     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8957                                                  : OMP_MAP_TARGET_PARAM);
8958     // If any element has the present modifier, then make sure the runtime
8959     // doesn't attempt to allocate the struct.
8960     if (CurTypes.end() !=
8961         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8962           return Type & OMP_MAP_PRESENT;
8963         }))
8964       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8965     // Remove TARGET_PARAM flag from the first element
8966     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8967     // If any element has the ompx_hold modifier, then make sure the runtime
8968     // uses the hold reference count for the struct as a whole so that it won't
8969     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8970     // elements as well so the runtime knows which reference count to check
8971     // when determining whether it's time for device-to-host transfers of
8972     // individual elements.
8973     if (CurTypes.end() !=
8974         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8975           return Type & OMP_MAP_OMPX_HOLD;
8976         })) {
8977       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8978       for (auto &M : CurTypes)
8979         M |= OMP_MAP_OMPX_HOLD;
8980     }
8981 
8982     // All other current entries will be MEMBER_OF the combined entry
8983     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8984     // 0xFFFF in the MEMBER_OF field).
8985     OpenMPOffloadMappingFlags MemberOfFlag =
8986         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8987     for (auto &M : CurTypes)
8988       setCorrectMemberOfFlag(M, MemberOfFlag);
8989   }
8990 
8991   /// Generate all the base pointers, section pointers, sizes, map types, and
8992   /// mappers for the extracted mappable expressions (all included in \a
8993   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8994   /// pair of the relevant declaration and index where it occurs is appended to
8995   /// the device pointers info array.
8996   void generateAllInfo(
8997       MapCombinedInfoTy &CombinedInfo,
8998       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8999           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9000     assert(CurDir.is<const OMPExecutableDirective *>() &&
9001            "Expect a executable directive");
9002     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9003     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9004   }
9005 
9006   /// Generate all the base pointers, section pointers, sizes, map types, and
9007   /// mappers for the extracted map clauses of user-defined mapper (all included
9008   /// in \a CombinedInfo).
9009   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9010     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9011            "Expect a declare mapper directive");
9012     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9013     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9014   }
9015 
9016   /// Emit capture info for lambdas for variables captured by reference.
9017   void generateInfoForLambdaCaptures(
9018       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9019       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9020     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9021     const auto *RD = VDType->getAsCXXRecordDecl();
9022     if (!RD || !RD->isLambda())
9023       return;
9024     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9025                    CGF.getContext().getDeclAlign(VD));
9026     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9027     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9028     FieldDecl *ThisCapture = nullptr;
9029     RD->getCaptureFields(Captures, ThisCapture);
9030     if (ThisCapture) {
9031       LValue ThisLVal =
9032           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9033       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9034       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9035                                  VDLVal.getPointer(CGF));
9036       CombinedInfo.Exprs.push_back(VD);
9037       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9038       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9039       CombinedInfo.Sizes.push_back(
9040           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9041                                     CGF.Int64Ty, /*isSigned=*/true));
9042       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9043                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9044       CombinedInfo.Mappers.push_back(nullptr);
9045     }
9046     for (const LambdaCapture &LC : RD->captures()) {
9047       if (!LC.capturesVariable())
9048         continue;
9049       const VarDecl *VD = LC.getCapturedVar();
9050       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9051         continue;
9052       auto It = Captures.find(VD);
9053       assert(It != Captures.end() && "Found lambda capture without field.");
9054       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9055       if (LC.getCaptureKind() == LCK_ByRef) {
9056         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9057         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9058                                    VDLVal.getPointer(CGF));
9059         CombinedInfo.Exprs.push_back(VD);
9060         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9061         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9062         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9063             CGF.getTypeSize(
9064                 VD->getType().getCanonicalType().getNonReferenceType()),
9065             CGF.Int64Ty, /*isSigned=*/true));
9066       } else {
9067         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9068         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9069                                    VDLVal.getPointer(CGF));
9070         CombinedInfo.Exprs.push_back(VD);
9071         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9072         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9073         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9074       }
9075       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9076                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9077       CombinedInfo.Mappers.push_back(nullptr);
9078     }
9079   }
9080 
9081   /// Set correct indices for lambdas captures.
9082   void adjustMemberOfForLambdaCaptures(
9083       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9084       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9085       MapFlagsArrayTy &Types) const {
9086     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9087       // Set correct member_of idx for all implicit lambda captures.
9088       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9089                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9090         continue;
9091       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9092       assert(BasePtr && "Unable to find base lambda address.");
9093       int TgtIdx = -1;
9094       for (unsigned J = I; J > 0; --J) {
9095         unsigned Idx = J - 1;
9096         if (Pointers[Idx] != BasePtr)
9097           continue;
9098         TgtIdx = Idx;
9099         break;
9100       }
9101       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9102       // All other current entries will be MEMBER_OF the combined entry
9103       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9104       // 0xFFFF in the MEMBER_OF field).
9105       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9106       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9107     }
9108   }
9109 
9110   /// Generate the base pointers, section pointers, sizes, map types, and
9111   /// mappers associated to a given capture (all included in \a CombinedInfo).
9112   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9113                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9114                               StructRangeInfoTy &PartialStruct) const {
9115     assert(!Cap->capturesVariableArrayType() &&
9116            "Not expecting to generate map info for a variable array type!");
9117 
9118     // We need to know when we generating information for the first component
9119     const ValueDecl *VD = Cap->capturesThis()
9120                               ? nullptr
9121                               : Cap->getCapturedVar()->getCanonicalDecl();
9122 
9123     // for map(to: lambda): skip here, processing it in
9124     // generateDefaultMapInfo
9125     if (LambdasMap.count(VD))
9126       return;
9127 
9128     // If this declaration appears in a is_device_ptr clause we just have to
9129     // pass the pointer by value. If it is a reference to a declaration, we just
9130     // pass its value.
9131     if (DevPointersMap.count(VD)) {
9132       CombinedInfo.Exprs.push_back(VD);
9133       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9134       CombinedInfo.Pointers.push_back(Arg);
9135       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9136           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9137           /*isSigned=*/true));
9138       CombinedInfo.Types.push_back(
9139           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9140           OMP_MAP_TARGET_PARAM);
9141       CombinedInfo.Mappers.push_back(nullptr);
9142       return;
9143     }
9144 
9145     using MapData =
9146         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9147                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9148                    const ValueDecl *, const Expr *>;
9149     SmallVector<MapData, 4> DeclComponentLists;
9150     assert(CurDir.is<const OMPExecutableDirective *>() &&
9151            "Expect a executable directive");
9152     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9153     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9154       const auto *EI = C->getVarRefs().begin();
9155       for (const auto L : C->decl_component_lists(VD)) {
9156         const ValueDecl *VDecl, *Mapper;
9157         // The Expression is not correct if the mapping is implicit
9158         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9159         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9160         std::tie(VDecl, Components, Mapper) = L;
9161         assert(VDecl == VD && "We got information for the wrong declaration??");
9162         assert(!Components.empty() &&
9163                "Not expecting declaration with no component lists.");
9164         DeclComponentLists.emplace_back(Components, C->getMapType(),
9165                                         C->getMapTypeModifiers(),
9166                                         C->isImplicit(), Mapper, E);
9167         ++EI;
9168       }
9169     }
9170     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9171                                              const MapData &RHS) {
9172       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9173       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9174       bool HasPresent =
9175           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9176       bool HasAllocs = MapType == OMPC_MAP_alloc;
9177       MapModifiers = std::get<2>(RHS);
9178       MapType = std::get<1>(LHS);
9179       bool HasPresentR =
9180           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9181       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9182       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9183     });
9184 
9185     // Find overlapping elements (including the offset from the base element).
9186     llvm::SmallDenseMap<
9187         const MapData *,
9188         llvm::SmallVector<
9189             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9190         4>
9191         OverlappedData;
9192     size_t Count = 0;
9193     for (const MapData &L : DeclComponentLists) {
9194       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9195       OpenMPMapClauseKind MapType;
9196       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9197       bool IsImplicit;
9198       const ValueDecl *Mapper;
9199       const Expr *VarRef;
9200       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9201           L;
9202       ++Count;
9203       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9204         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9205         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9206                  VarRef) = L1;
9207         auto CI = Components.rbegin();
9208         auto CE = Components.rend();
9209         auto SI = Components1.rbegin();
9210         auto SE = Components1.rend();
9211         for (; CI != CE && SI != SE; ++CI, ++SI) {
9212           if (CI->getAssociatedExpression()->getStmtClass() !=
9213               SI->getAssociatedExpression()->getStmtClass())
9214             break;
9215           // Are we dealing with different variables/fields?
9216           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9217             break;
9218         }
9219         // Found overlapping if, at least for one component, reached the head
9220         // of the components list.
9221         if (CI == CE || SI == SE) {
9222           // Ignore it if it is the same component.
9223           if (CI == CE && SI == SE)
9224             continue;
9225           const auto It = (SI == SE) ? CI : SI;
9226           // If one component is a pointer and another one is a kind of
9227           // dereference of this pointer (array subscript, section, dereference,
9228           // etc.), it is not an overlapping.
9229           // Same, if one component is a base and another component is a
9230           // dereferenced pointer memberexpr with the same base.
9231           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9232               (std::prev(It)->getAssociatedDeclaration() &&
9233                std::prev(It)
9234                    ->getAssociatedDeclaration()
9235                    ->getType()
9236                    ->isPointerType()) ||
9237               (It->getAssociatedDeclaration() &&
9238                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9239                std::next(It) != CE && std::next(It) != SE))
9240             continue;
9241           const MapData &BaseData = CI == CE ? L : L1;
9242           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9243               SI == SE ? Components : Components1;
9244           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9245           OverlappedElements.getSecond().push_back(SubData);
9246         }
9247       }
9248     }
9249     // Sort the overlapped elements for each item.
9250     llvm::SmallVector<const FieldDecl *, 4> Layout;
9251     if (!OverlappedData.empty()) {
9252       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9253       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9254       while (BaseType != OrigType) {
9255         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9256         OrigType = BaseType->getPointeeOrArrayElementType();
9257       }
9258 
9259       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9260         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9261       else {
9262         const auto *RD = BaseType->getAsRecordDecl();
9263         Layout.append(RD->field_begin(), RD->field_end());
9264       }
9265     }
9266     for (auto &Pair : OverlappedData) {
9267       llvm::stable_sort(
9268           Pair.getSecond(),
9269           [&Layout](
9270               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9271               OMPClauseMappableExprCommon::MappableExprComponentListRef
9272                   Second) {
9273             auto CI = First.rbegin();
9274             auto CE = First.rend();
9275             auto SI = Second.rbegin();
9276             auto SE = Second.rend();
9277             for (; CI != CE && SI != SE; ++CI, ++SI) {
9278               if (CI->getAssociatedExpression()->getStmtClass() !=
9279                   SI->getAssociatedExpression()->getStmtClass())
9280                 break;
9281               // Are we dealing with different variables/fields?
9282               if (CI->getAssociatedDeclaration() !=
9283                   SI->getAssociatedDeclaration())
9284                 break;
9285             }
9286 
9287             // Lists contain the same elements.
9288             if (CI == CE && SI == SE)
9289               return false;
9290 
9291             // List with less elements is less than list with more elements.
9292             if (CI == CE || SI == SE)
9293               return CI == CE;
9294 
9295             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9296             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9297             if (FD1->getParent() == FD2->getParent())
9298               return FD1->getFieldIndex() < FD2->getFieldIndex();
9299             const auto *It =
9300                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9301                   return FD == FD1 || FD == FD2;
9302                 });
9303             return *It == FD1;
9304           });
9305     }
9306 
9307     // Associated with a capture, because the mapping flags depend on it.
9308     // Go through all of the elements with the overlapped elements.
9309     bool IsFirstComponentList = true;
9310     for (const auto &Pair : OverlappedData) {
9311       const MapData &L = *Pair.getFirst();
9312       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9313       OpenMPMapClauseKind MapType;
9314       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9315       bool IsImplicit;
9316       const ValueDecl *Mapper;
9317       const Expr *VarRef;
9318       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9319           L;
9320       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9321           OverlappedComponents = Pair.getSecond();
9322       generateInfoForComponentList(
9323           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9324           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9325           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9326       IsFirstComponentList = false;
9327     }
9328     // Go through other elements without overlapped elements.
9329     for (const MapData &L : DeclComponentLists) {
9330       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9331       OpenMPMapClauseKind MapType;
9332       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9333       bool IsImplicit;
9334       const ValueDecl *Mapper;
9335       const Expr *VarRef;
9336       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9337           L;
9338       auto It = OverlappedData.find(&L);
9339       if (It == OverlappedData.end())
9340         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9341                                      Components, CombinedInfo, PartialStruct,
9342                                      IsFirstComponentList, IsImplicit, Mapper,
9343                                      /*ForDeviceAddr=*/false, VD, VarRef);
9344       IsFirstComponentList = false;
9345     }
9346   }
9347 
9348   /// Generate the default map information for a given capture \a CI,
9349   /// record field declaration \a RI and captured value \a CV.
9350   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9351                               const FieldDecl &RI, llvm::Value *CV,
9352                               MapCombinedInfoTy &CombinedInfo) const {
9353     bool IsImplicit = true;
9354     // Do the default mapping.
9355     if (CI.capturesThis()) {
9356       CombinedInfo.Exprs.push_back(nullptr);
9357       CombinedInfo.BasePointers.push_back(CV);
9358       CombinedInfo.Pointers.push_back(CV);
9359       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9360       CombinedInfo.Sizes.push_back(
9361           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9362                                     CGF.Int64Ty, /*isSigned=*/true));
9363       // Default map type.
9364       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9365     } else if (CI.capturesVariableByCopy()) {
9366       const VarDecl *VD = CI.getCapturedVar();
9367       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9368       CombinedInfo.BasePointers.push_back(CV);
9369       CombinedInfo.Pointers.push_back(CV);
9370       if (!RI.getType()->isAnyPointerType()) {
9371         // We have to signal to the runtime captures passed by value that are
9372         // not pointers.
9373         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9374         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9375             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9376       } else {
9377         // Pointers are implicitly mapped with a zero size and no flags
9378         // (other than first map that is added for all implicit maps).
9379         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9380         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9381       }
9382       auto I = FirstPrivateDecls.find(VD);
9383       if (I != FirstPrivateDecls.end())
9384         IsImplicit = I->getSecond();
9385     } else {
9386       assert(CI.capturesVariable() && "Expected captured reference.");
9387       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9388       QualType ElementType = PtrTy->getPointeeType();
9389       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9390           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9391       // The default map type for a scalar/complex type is 'to' because by
9392       // default the value doesn't have to be retrieved. For an aggregate
9393       // type, the default is 'tofrom'.
9394       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9395       const VarDecl *VD = CI.getCapturedVar();
9396       auto I = FirstPrivateDecls.find(VD);
9397       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9398       CombinedInfo.BasePointers.push_back(CV);
9399       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9400         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9401             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9402             AlignmentSource::Decl));
9403         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9404       } else {
9405         CombinedInfo.Pointers.push_back(CV);
9406       }
9407       if (I != FirstPrivateDecls.end())
9408         IsImplicit = I->getSecond();
9409     }
9410     // Every default map produces a single argument which is a target parameter.
9411     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9412 
9413     // Add flag stating this is an implicit map.
9414     if (IsImplicit)
9415       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9416 
9417     // No user-defined mapper for default mapping.
9418     CombinedInfo.Mappers.push_back(nullptr);
9419   }
9420 };
9421 } // anonymous namespace
9422 
9423 static void emitNonContiguousDescriptor(
9424     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9425     CGOpenMPRuntime::TargetDataInfo &Info) {
9426   CodeGenModule &CGM = CGF.CGM;
9427   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9428       &NonContigInfo = CombinedInfo.NonContigInfo;
9429 
9430   // Build an array of struct descriptor_dim and then assign it to
9431   // offload_args.
9432   //
9433   // struct descriptor_dim {
9434   //  uint64_t offset;
9435   //  uint64_t count;
9436   //  uint64_t stride
9437   // };
9438   ASTContext &C = CGF.getContext();
9439   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9440   RecordDecl *RD;
9441   RD = C.buildImplicitRecord("descriptor_dim");
9442   RD->startDefinition();
9443   addFieldToRecordDecl(C, RD, Int64Ty);
9444   addFieldToRecordDecl(C, RD, Int64Ty);
9445   addFieldToRecordDecl(C, RD, Int64Ty);
9446   RD->completeDefinition();
9447   QualType DimTy = C.getRecordType(RD);
9448 
9449   enum { OffsetFD = 0, CountFD, StrideFD };
9450   // We need two index variable here since the size of "Dims" is the same as the
9451   // size of Components, however, the size of offset, count, and stride is equal
9452   // to the size of base declaration that is non-contiguous.
9453   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9454     // Skip emitting ir if dimension size is 1 since it cannot be
9455     // non-contiguous.
9456     if (NonContigInfo.Dims[I] == 1)
9457       continue;
9458     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9459     QualType ArrayTy =
9460         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9461     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9462     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9463       unsigned RevIdx = EE - II - 1;
9464       LValue DimsLVal = CGF.MakeAddrLValue(
9465           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9466       // Offset
9467       LValue OffsetLVal = CGF.EmitLValueForField(
9468           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9469       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9470       // Count
9471       LValue CountLVal = CGF.EmitLValueForField(
9472           DimsLVal, *std::next(RD->field_begin(), CountFD));
9473       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9474       // Stride
9475       LValue StrideLVal = CGF.EmitLValueForField(
9476           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9477       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9478     }
9479     // args[I] = &dims
9480     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9481         DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9482     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9483         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9484         Info.PointersArray, 0, I);
9485     Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
9486     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9487     ++L;
9488   }
9489 }
9490 
9491 // Try to extract the base declaration from a `this->x` expression if possible.
9492 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9493   if (!E)
9494     return nullptr;
9495 
9496   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9497     if (const MemberExpr *ME =
9498             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9499       return ME->getMemberDecl();
9500   return nullptr;
9501 }
9502 
9503 /// Emit a string constant containing the names of the values mapped to the
9504 /// offloading runtime library.
9505 llvm::Constant *
9506 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9507                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9508 
9509   uint32_t SrcLocStrSize;
9510   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9511     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9512 
9513   SourceLocation Loc;
9514   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9515     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9516       Loc = VD->getLocation();
9517     else
9518       Loc = MapExprs.getMapExpr()->getExprLoc();
9519   } else {
9520     Loc = MapExprs.getMapDecl()->getLocation();
9521   }
9522 
9523   std::string ExprName;
9524   if (MapExprs.getMapExpr()) {
9525     PrintingPolicy P(CGF.getContext().getLangOpts());
9526     llvm::raw_string_ostream OS(ExprName);
9527     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9528     OS.flush();
9529   } else {
9530     ExprName = MapExprs.getMapDecl()->getNameAsString();
9531   }
9532 
9533   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9534   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9535                                          PLoc.getLine(), PLoc.getColumn(),
9536                                          SrcLocStrSize);
9537 }
9538 
9539 /// Emit the arrays used to pass the captures and map information to the
9540 /// offloading runtime library. If there is no map or capture information,
9541 /// return nullptr by reference.
9542 static void emitOffloadingArrays(
9543     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9544     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9545     bool IsNonContiguous = false) {
9546   CodeGenModule &CGM = CGF.CGM;
9547   ASTContext &Ctx = CGF.getContext();
9548 
9549   // Reset the array information.
9550   Info.clearArrayInfo();
9551   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9552 
9553   if (Info.NumberOfPtrs) {
9554     // Detect if we have any capture size requiring runtime evaluation of the
9555     // size so that a constant array could be eventually used.
9556 
9557     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9558     QualType PointerArrayType = Ctx.getConstantArrayType(
9559         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9560         /*IndexTypeQuals=*/0);
9561 
9562     Info.BasePointersArray =
9563         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9564     Info.PointersArray =
9565         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9566     Address MappersArray =
9567         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9568     Info.MappersArray = MappersArray.getPointer();
9569 
9570     // If we don't have any VLA types or other types that require runtime
9571     // evaluation, we can use a constant array for the map sizes, otherwise we
9572     // need to fill up the arrays as we do for the pointers.
9573     QualType Int64Ty =
9574         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9575     SmallVector<llvm::Constant *> ConstSizes(
9576         CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9577     llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9578     for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9579       if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9580         if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9581           if (IsNonContiguous && (CombinedInfo.Types[I] &
9582                                   MappableExprsHandler::OMP_MAP_NON_CONTIG))
9583             ConstSizes[I] = llvm::ConstantInt::get(
9584                 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9585           else
9586             ConstSizes[I] = CI;
9587           continue;
9588         }
9589       }
9590       RuntimeSizes.set(I);
9591     }
9592 
9593     if (RuntimeSizes.all()) {
9594       QualType SizeArrayType = Ctx.getConstantArrayType(
9595           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9596           /*IndexTypeQuals=*/0);
9597       Info.SizesArray =
9598           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9599     } else {
9600       auto *SizesArrayInit = llvm::ConstantArray::get(
9601           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9602       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9603       auto *SizesArrayGbl = new llvm::GlobalVariable(
9604           CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9605           llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9606       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9607       if (RuntimeSizes.any()) {
9608         QualType SizeArrayType = Ctx.getConstantArrayType(
9609             Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9610             /*IndexTypeQuals=*/0);
9611         Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9612         llvm::Value *GblConstPtr =
9613             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9614                 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9615         CGF.Builder.CreateMemCpy(
9616             Buffer,
9617             Address(GblConstPtr, CGM.Int64Ty,
9618                     CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9619                         /*DestWidth=*/64, /*Signed=*/false))),
9620             CGF.getTypeSize(SizeArrayType));
9621         Info.SizesArray = Buffer.getPointer();
9622       } else {
9623         Info.SizesArray = SizesArrayGbl;
9624       }
9625     }
9626 
9627     // The map types are always constant so we don't need to generate code to
9628     // fill arrays. Instead, we create an array constant.
9629     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9630     llvm::copy(CombinedInfo.Types, Mapping.begin());
9631     std::string MaptypesName =
9632         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9633     auto *MapTypesArrayGbl =
9634         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9635     Info.MapTypesArray = MapTypesArrayGbl;
9636 
9637     // The information types are only built if there is debug information
9638     // requested.
9639     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9640       Info.MapNamesArray = llvm::Constant::getNullValue(
9641           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9642     } else {
9643       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9644         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9645       };
9646       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9647       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9648       std::string MapnamesName =
9649           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9650       auto *MapNamesArrayGbl =
9651           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9652       Info.MapNamesArray = MapNamesArrayGbl;
9653     }
9654 
9655     // If there's a present map type modifier, it must not be applied to the end
9656     // of a region, so generate a separate map type array in that case.
9657     if (Info.separateBeginEndCalls()) {
9658       bool EndMapTypesDiffer = false;
9659       for (uint64_t &Type : Mapping) {
9660         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9661           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9662           EndMapTypesDiffer = true;
9663         }
9664       }
9665       if (EndMapTypesDiffer) {
9666         MapTypesArrayGbl =
9667             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9668         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9669       }
9670     }
9671 
9672     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9673       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9674       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9675           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9676           Info.BasePointersArray, 0, I);
9677       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9678           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9679       Address BPAddr(BP, BPVal->getType(),
9680                      Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9681       CGF.Builder.CreateStore(BPVal, BPAddr);
9682 
9683       if (Info.requiresDevicePointerInfo())
9684         if (const ValueDecl *DevVD =
9685                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9686           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9687 
9688       llvm::Value *PVal = CombinedInfo.Pointers[I];
9689       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9690           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9691           Info.PointersArray, 0, I);
9692       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9693           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9694       Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9695       CGF.Builder.CreateStore(PVal, PAddr);
9696 
9697       if (RuntimeSizes.test(I)) {
9698         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9699             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9700             Info.SizesArray,
9701             /*Idx0=*/0,
9702             /*Idx1=*/I);
9703         Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
9704         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9705                                                           CGM.Int64Ty,
9706                                                           /*isSigned=*/true),
9707                                 SAddr);
9708       }
9709 
9710       // Fill up the mapper array.
9711       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9712       if (CombinedInfo.Mappers[I]) {
9713         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9714             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9715         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9716         Info.HasMapper = true;
9717       }
9718       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9719       CGF.Builder.CreateStore(MFunc, MAddr);
9720     }
9721   }
9722 
9723   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9724       Info.NumberOfPtrs == 0)
9725     return;
9726 
9727   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9728 }
9729 
9730 namespace {
9731 /// Additional arguments for emitOffloadingArraysArgument function.
9732 struct ArgumentsOptions {
9733   bool ForEndCall = false;
9734   ArgumentsOptions() = default;
9735   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9736 };
9737 } // namespace
9738 
9739 /// Emit the arguments to be passed to the runtime library based on the
9740 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9741 /// ForEndCall, emit map types to be passed for the end of the region instead of
9742 /// the beginning.
9743 static void emitOffloadingArraysArgument(
9744     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9745     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9746     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9747     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9748     const ArgumentsOptions &Options = ArgumentsOptions()) {
9749   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9750          "expected region end call to runtime only when end call is separate");
9751   CodeGenModule &CGM = CGF.CGM;
9752   if (Info.NumberOfPtrs) {
9753     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9754         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9755         Info.BasePointersArray,
9756         /*Idx0=*/0, /*Idx1=*/0);
9757     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9758         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9759         Info.PointersArray,
9760         /*Idx0=*/0,
9761         /*Idx1=*/0);
9762     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9763         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9764         /*Idx0=*/0, /*Idx1=*/0);
9765     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9766         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9767         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9768                                                     : Info.MapTypesArray,
9769         /*Idx0=*/0,
9770         /*Idx1=*/0);
9771 
9772     // Only emit the mapper information arrays if debug information is
9773     // requested.
9774     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9775       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9776     else
9777       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9778           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9779           Info.MapNamesArray,
9780           /*Idx0=*/0,
9781           /*Idx1=*/0);
9782     // If there is no user-defined mapper, set the mapper array to nullptr to
9783     // avoid an unnecessary data privatization
9784     if (!Info.HasMapper)
9785       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9786     else
9787       MappersArrayArg =
9788           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9789   } else {
9790     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9791     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9792     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9793     MapTypesArrayArg =
9794         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9795     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9796     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9797   }
9798 }
9799 
9800 /// Check for inner distribute directive.
9801 static const OMPExecutableDirective *
9802 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9803   const auto *CS = D.getInnermostCapturedStmt();
9804   const auto *Body =
9805       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9806   const Stmt *ChildStmt =
9807       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9808 
9809   if (const auto *NestedDir =
9810           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9811     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9812     switch (D.getDirectiveKind()) {
9813     case OMPD_target:
9814       if (isOpenMPDistributeDirective(DKind))
9815         return NestedDir;
9816       if (DKind == OMPD_teams) {
9817         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9818             /*IgnoreCaptured=*/true);
9819         if (!Body)
9820           return nullptr;
9821         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9822         if (const auto *NND =
9823                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9824           DKind = NND->getDirectiveKind();
9825           if (isOpenMPDistributeDirective(DKind))
9826             return NND;
9827         }
9828       }
9829       return nullptr;
9830     case OMPD_target_teams:
9831       if (isOpenMPDistributeDirective(DKind))
9832         return NestedDir;
9833       return nullptr;
9834     case OMPD_target_parallel:
9835     case OMPD_target_simd:
9836     case OMPD_target_parallel_for:
9837     case OMPD_target_parallel_for_simd:
9838       return nullptr;
9839     case OMPD_target_teams_distribute:
9840     case OMPD_target_teams_distribute_simd:
9841     case OMPD_target_teams_distribute_parallel_for:
9842     case OMPD_target_teams_distribute_parallel_for_simd:
9843     case OMPD_parallel:
9844     case OMPD_for:
9845     case OMPD_parallel_for:
9846     case OMPD_parallel_master:
9847     case OMPD_parallel_sections:
9848     case OMPD_for_simd:
9849     case OMPD_parallel_for_simd:
9850     case OMPD_cancel:
9851     case OMPD_cancellation_point:
9852     case OMPD_ordered:
9853     case OMPD_threadprivate:
9854     case OMPD_allocate:
9855     case OMPD_task:
9856     case OMPD_simd:
9857     case OMPD_tile:
9858     case OMPD_unroll:
9859     case OMPD_sections:
9860     case OMPD_section:
9861     case OMPD_single:
9862     case OMPD_master:
9863     case OMPD_critical:
9864     case OMPD_taskyield:
9865     case OMPD_barrier:
9866     case OMPD_taskwait:
9867     case OMPD_taskgroup:
9868     case OMPD_atomic:
9869     case OMPD_flush:
9870     case OMPD_depobj:
9871     case OMPD_scan:
9872     case OMPD_teams:
9873     case OMPD_target_data:
9874     case OMPD_target_exit_data:
9875     case OMPD_target_enter_data:
9876     case OMPD_distribute:
9877     case OMPD_distribute_simd:
9878     case OMPD_distribute_parallel_for:
9879     case OMPD_distribute_parallel_for_simd:
9880     case OMPD_teams_distribute:
9881     case OMPD_teams_distribute_simd:
9882     case OMPD_teams_distribute_parallel_for:
9883     case OMPD_teams_distribute_parallel_for_simd:
9884     case OMPD_target_update:
9885     case OMPD_declare_simd:
9886     case OMPD_declare_variant:
9887     case OMPD_begin_declare_variant:
9888     case OMPD_end_declare_variant:
9889     case OMPD_declare_target:
9890     case OMPD_end_declare_target:
9891     case OMPD_declare_reduction:
9892     case OMPD_declare_mapper:
9893     case OMPD_taskloop:
9894     case OMPD_taskloop_simd:
9895     case OMPD_master_taskloop:
9896     case OMPD_master_taskloop_simd:
9897     case OMPD_parallel_master_taskloop:
9898     case OMPD_parallel_master_taskloop_simd:
9899     case OMPD_requires:
9900     case OMPD_metadirective:
9901     case OMPD_unknown:
9902     default:
9903       llvm_unreachable("Unexpected directive.");
9904     }
9905   }
9906 
9907   return nullptr;
9908 }
9909 
9910 /// Emit the user-defined mapper function. The code generation follows the
9911 /// pattern in the example below.
9912 /// \code
9913 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9914 ///                                           void *base, void *begin,
9915 ///                                           int64_t size, int64_t type,
9916 ///                                           void *name = nullptr) {
9917 ///   // Allocate space for an array section first or add a base/begin for
9918 ///   // pointer dereference.
9919 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9920 ///       !maptype.IsDelete)
9921 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9922 ///                                 size*sizeof(Ty), clearToFromMember(type));
9923 ///   // Map members.
9924 ///   for (unsigned i = 0; i < size; i++) {
9925 ///     // For each component specified by this mapper:
9926 ///     for (auto c : begin[i]->all_components) {
9927 ///       if (c.hasMapper())
9928 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9929 ///                       c.arg_type, c.arg_name);
9930 ///       else
9931 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9932 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9933 ///                                     c.arg_name);
9934 ///     }
9935 ///   }
9936 ///   // Delete the array section.
9937 ///   if (size > 1 && maptype.IsDelete)
9938 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9939 ///                                 size*sizeof(Ty), clearToFromMember(type));
9940 /// }
9941 /// \endcode
9942 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9943                                             CodeGenFunction *CGF) {
9944   if (UDMMap.count(D) > 0)
9945     return;
9946   ASTContext &C = CGM.getContext();
9947   QualType Ty = D->getType();
9948   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9949   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9950   auto *MapperVarDecl =
9951       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9952   SourceLocation Loc = D->getLocation();
9953   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9954   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9955 
9956   // Prepare mapper function arguments and attributes.
9957   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9958                               C.VoidPtrTy, ImplicitParamDecl::Other);
9959   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9960                             ImplicitParamDecl::Other);
9961   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9962                              C.VoidPtrTy, ImplicitParamDecl::Other);
9963   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9964                             ImplicitParamDecl::Other);
9965   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9966                             ImplicitParamDecl::Other);
9967   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9968                             ImplicitParamDecl::Other);
9969   FunctionArgList Args;
9970   Args.push_back(&HandleArg);
9971   Args.push_back(&BaseArg);
9972   Args.push_back(&BeginArg);
9973   Args.push_back(&SizeArg);
9974   Args.push_back(&TypeArg);
9975   Args.push_back(&NameArg);
9976   const CGFunctionInfo &FnInfo =
9977       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9978   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9979   SmallString<64> TyStr;
9980   llvm::raw_svector_ostream Out(TyStr);
9981   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9982   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9983   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9984                                     Name, &CGM.getModule());
9985   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9986   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9987   // Start the mapper function code generation.
9988   CodeGenFunction MapperCGF(CGM);
9989   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9990   // Compute the starting and end addresses of array elements.
9991   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9992       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9993       C.getPointerType(Int64Ty), Loc);
9994   // Prepare common arguments for array initiation and deletion.
9995   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9996       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9997       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9998   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9999       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10000       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10001   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10002       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10003       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10004   // Convert the size in bytes into the number of array elements.
10005   Size = MapperCGF.Builder.CreateExactUDiv(
10006       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10007   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10008       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10009   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
10010   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10011       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10012       C.getPointerType(Int64Ty), Loc);
10013   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10014       MapperCGF.GetAddrOfLocalVar(&NameArg),
10015       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10016 
10017   // Emit array initiation if this is an array section and \p MapType indicates
10018   // that memory allocation is required.
10019   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10020   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10021                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10022 
10023   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10024 
10025   // Emit the loop header block.
10026   MapperCGF.EmitBlock(HeadBB);
10027   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10028   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10029   // Evaluate whether the initial condition is satisfied.
10030   llvm::Value *IsEmpty =
10031       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10032   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10033   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10034 
10035   // Emit the loop body block.
10036   MapperCGF.EmitBlock(BodyBB);
10037   llvm::BasicBlock *LastBB = BodyBB;
10038   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10039       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10040   PtrPHI->addIncoming(PtrBegin, EntryBB);
10041   Address PtrCurrent(PtrPHI, ElemTy,
10042                      MapperCGF.GetAddrOfLocalVar(&BeginArg)
10043                          .getAlignment()
10044                          .alignmentOfArrayElement(ElementSize));
10045   // Privatize the declared variable of mapper to be the current array element.
10046   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10047   Scope.addPrivate(MapperVarDecl, PtrCurrent);
10048   (void)Scope.Privatize();
10049 
10050   // Get map clause information. Fill up the arrays with all mapped variables.
10051   MappableExprsHandler::MapCombinedInfoTy Info;
10052   MappableExprsHandler MEHandler(*D, MapperCGF);
10053   MEHandler.generateAllInfoForMapper(Info);
10054 
10055   // Call the runtime API __tgt_mapper_num_components to get the number of
10056   // pre-existing components.
10057   llvm::Value *OffloadingArgs[] = {Handle};
10058   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10059       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10060                                             OMPRTL___tgt_mapper_num_components),
10061       OffloadingArgs);
10062   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10063       PreviousSize,
10064       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10065 
10066   // Fill up the runtime mapper handle for all components.
10067   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10068     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10069         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10070     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10071         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10072     llvm::Value *CurSizeArg = Info.Sizes[I];
10073     llvm::Value *CurNameArg =
10074         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10075             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10076             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10077 
10078     // Extract the MEMBER_OF field from the map type.
10079     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10080     llvm::Value *MemberMapType =
10081         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10082 
10083     // Combine the map type inherited from user-defined mapper with that
10084     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10085     // bits of the \a MapType, which is the input argument of the mapper
10086     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10087     // bits of MemberMapType.
10088     // [OpenMP 5.0], 1.2.6. map-type decay.
10089     //        | alloc |  to   | from  | tofrom | release | delete
10090     // ----------------------------------------------------------
10091     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10092     // to     | alloc |  to   | alloc |   to   | release | delete
10093     // from   | alloc | alloc | from  |  from  | release | delete
10094     // tofrom | alloc |  to   | from  | tofrom | release | delete
10095     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10096         MapType,
10097         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10098                                    MappableExprsHandler::OMP_MAP_FROM));
10099     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10100     llvm::BasicBlock *AllocElseBB =
10101         MapperCGF.createBasicBlock("omp.type.alloc.else");
10102     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10103     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10104     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10105     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10106     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10107     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10108     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10109     MapperCGF.EmitBlock(AllocBB);
10110     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10111         MemberMapType,
10112         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10113                                      MappableExprsHandler::OMP_MAP_FROM)));
10114     MapperCGF.Builder.CreateBr(EndBB);
10115     MapperCGF.EmitBlock(AllocElseBB);
10116     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10117         LeftToFrom,
10118         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10119     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10120     // In case of to, clear OMP_MAP_FROM.
10121     MapperCGF.EmitBlock(ToBB);
10122     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10123         MemberMapType,
10124         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10125     MapperCGF.Builder.CreateBr(EndBB);
10126     MapperCGF.EmitBlock(ToElseBB);
10127     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10128         LeftToFrom,
10129         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10130     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10131     // In case of from, clear OMP_MAP_TO.
10132     MapperCGF.EmitBlock(FromBB);
10133     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10134         MemberMapType,
10135         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10136     // In case of tofrom, do nothing.
10137     MapperCGF.EmitBlock(EndBB);
10138     LastBB = EndBB;
10139     llvm::PHINode *CurMapType =
10140         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10141     CurMapType->addIncoming(AllocMapType, AllocBB);
10142     CurMapType->addIncoming(ToMapType, ToBB);
10143     CurMapType->addIncoming(FromMapType, FromBB);
10144     CurMapType->addIncoming(MemberMapType, ToElseBB);
10145 
10146     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10147                                      CurSizeArg, CurMapType, CurNameArg};
10148     if (Info.Mappers[I]) {
10149       // Call the corresponding mapper function.
10150       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10151           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10152       assert(MapperFunc && "Expect a valid mapper function is available.");
10153       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10154     } else {
10155       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10156       // data structure.
10157       MapperCGF.EmitRuntimeCall(
10158           OMPBuilder.getOrCreateRuntimeFunction(
10159               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10160           OffloadingArgs);
10161     }
10162   }
10163 
10164   // Update the pointer to point to the next element that needs to be mapped,
10165   // and check whether we have mapped all elements.
10166   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10167       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10168   PtrPHI->addIncoming(PtrNext, LastBB);
10169   llvm::Value *IsDone =
10170       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10171   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10172   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10173 
10174   MapperCGF.EmitBlock(ExitBB);
10175   // Emit array deletion if this is an array section and \p MapType indicates
10176   // that deletion is required.
10177   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10178                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10179 
10180   // Emit the function exit block.
10181   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10182   MapperCGF.FinishFunction();
10183   UDMMap.try_emplace(D, Fn);
10184   if (CGF) {
10185     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10186     Decls.second.push_back(D);
10187   }
10188 }
10189 
10190 /// Emit the array initialization or deletion portion for user-defined mapper
10191 /// code generation. First, it evaluates whether an array section is mapped and
10192 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10193 /// true, and \a MapType indicates to not delete this array, array
10194 /// initialization code is generated. If \a IsInit is false, and \a MapType
10195 /// indicates to not this array, array deletion code is generated.
10196 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10197     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10198     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10199     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10200     bool IsInit) {
10201   StringRef Prefix = IsInit ? ".init" : ".del";
10202 
10203   // Evaluate if this is an array section.
10204   llvm::BasicBlock *BodyBB =
10205       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10206   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10207       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10208   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10209       MapType,
10210       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10211   llvm::Value *DeleteCond;
10212   llvm::Value *Cond;
10213   if (IsInit) {
10214     // base != begin?
10215     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10216     // IsPtrAndObj?
10217     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10218         MapType,
10219         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10220     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10221     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10222     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10223     DeleteCond = MapperCGF.Builder.CreateIsNull(
10224         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10225   } else {
10226     Cond = IsArray;
10227     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10228         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10229   }
10230   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10231   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10232 
10233   MapperCGF.EmitBlock(BodyBB);
10234   // Get the array size by multiplying element size and element number (i.e., \p
10235   // Size).
10236   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10237       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10238   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10239   // memory allocation/deletion purpose only.
10240   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10241       MapType,
10242       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10243                                    MappableExprsHandler::OMP_MAP_FROM)));
10244   MapTypeArg = MapperCGF.Builder.CreateOr(
10245       MapTypeArg,
10246       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10247 
10248   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10249   // data structure.
10250   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10251                                    ArraySize, MapTypeArg, MapName};
10252   MapperCGF.EmitRuntimeCall(
10253       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10254                                             OMPRTL___tgt_push_mapper_component),
10255       OffloadingArgs);
10256 }
10257 
10258 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10259     const OMPDeclareMapperDecl *D) {
10260   auto I = UDMMap.find(D);
10261   if (I != UDMMap.end())
10262     return I->second;
10263   emitUserDefinedMapper(D);
10264   return UDMMap.lookup(D);
10265 }
10266 
10267 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10268     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10269     llvm::Value *DeviceID,
10270     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10271                                      const OMPLoopDirective &D)>
10272         SizeEmitter) {
10273   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10274   const OMPExecutableDirective *TD = &D;
10275   // Get nested teams distribute kind directive, if any.
10276   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10277     TD = getNestedDistributeDirective(CGM.getContext(), D);
10278   if (!TD)
10279     return;
10280   const auto *LD = cast<OMPLoopDirective>(TD);
10281   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10282                                                          PrePostActionTy &) {
10283     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10284       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10285       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10286       CGF.EmitRuntimeCall(
10287           OMPBuilder.getOrCreateRuntimeFunction(
10288               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10289           Args);
10290     }
10291   };
10292   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10293 }
10294 
10295 void CGOpenMPRuntime::emitTargetCall(
10296     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10297     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10298     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10299     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10300                                      const OMPLoopDirective &D)>
10301         SizeEmitter) {
10302   if (!CGF.HaveInsertPoint())
10303     return;
10304 
10305   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
10306                                    CGM.getLangOpts().OpenMPOffloadMandatory;
10307 
10308   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10309 
10310   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10311                                  D.hasClausesOfKind<OMPNowaitClause>();
10312   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10313   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10314   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10315                                             PrePostActionTy &) {
10316     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10317   };
10318   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10319 
10320   CodeGenFunction::OMPTargetDataInfo InputInfo;
10321   llvm::Value *MapTypesArray = nullptr;
10322   llvm::Value *MapNamesArray = nullptr;
10323   // Generate code for the host fallback function.
10324   auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
10325                         &CS, OffloadingMandatory](CodeGenFunction &CGF) {
10326     if (OffloadingMandatory) {
10327       CGF.Builder.CreateUnreachable();
10328     } else {
10329       if (RequiresOuterTask) {
10330         CapturedVars.clear();
10331         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10332       }
10333       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10334     }
10335   };
10336   // Fill up the pointer arrays and transfer execution to the device.
10337   auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
10338                     &MapNamesArray, SizeEmitter,
10339                     FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10340     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10341       // Reverse offloading is not supported, so just execute on the host.
10342       FallbackGen(CGF);
10343       return;
10344     }
10345 
10346     // On top of the arrays that were filled up, the target offloading call
10347     // takes as arguments the device id as well as the host pointer. The host
10348     // pointer is used by the runtime library to identify the current target
10349     // region, so it only has to be unique and not necessarily point to
10350     // anything. It could be the pointer to the outlined function that
10351     // implements the target region, but we aren't using that so that the
10352     // compiler doesn't need to keep that, and could therefore inline the host
10353     // function if proven worthwhile during optimization.
10354 
10355     // From this point on, we need to have an ID of the target region defined.
10356     assert(OutlinedFnID && "Invalid outlined function ID!");
10357     (void)OutlinedFnID;
10358 
10359     // Emit device ID if any.
10360     llvm::Value *DeviceID;
10361     if (Device.getPointer()) {
10362       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10363               Device.getInt() == OMPC_DEVICE_device_num) &&
10364              "Expected device_num modifier.");
10365       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10366       DeviceID =
10367           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10368     } else {
10369       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10370     }
10371 
10372     // Emit the number of elements in the offloading arrays.
10373     llvm::Value *PointerNum =
10374         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10375 
10376     // Return value of the runtime offloading call.
10377     llvm::Value *Return;
10378 
10379     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10380     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10381 
10382     // Source location for the ident struct
10383     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10384 
10385     // Emit tripcount for the target loop-based directive.
10386     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10387 
10388     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10389     // The target region is an outlined function launched by the runtime
10390     // via calls __tgt_target() or __tgt_target_teams().
10391     //
10392     // __tgt_target() launches a target region with one team and one thread,
10393     // executing a serial region.  This master thread may in turn launch
10394     // more threads within its team upon encountering a parallel region,
10395     // however, no additional teams can be launched on the device.
10396     //
10397     // __tgt_target_teams() launches a target region with one or more teams,
10398     // each with one or more threads.  This call is required for target
10399     // constructs such as:
10400     //  'target teams'
10401     //  'target' / 'teams'
10402     //  'target teams distribute parallel for'
10403     //  'target parallel'
10404     // and so on.
10405     //
10406     // Note that on the host and CPU targets, the runtime implementation of
10407     // these calls simply call the outlined function without forking threads.
10408     // The outlined functions themselves have runtime calls to
10409     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10410     // the compiler in emitTeamsCall() and emitParallelCall().
10411     //
10412     // In contrast, on the NVPTX target, the implementation of
10413     // __tgt_target_teams() launches a GPU kernel with the requested number
10414     // of teams and threads so no additional calls to the runtime are required.
10415     if (NumTeams) {
10416       // If we have NumTeams defined this means that we have an enclosed teams
10417       // region. Therefore we also expect to have NumThreads defined. These two
10418       // values should be defined in the presence of a teams directive,
10419       // regardless of having any clauses associated. If the user is using teams
10420       // but no clauses, these two values will be the default that should be
10421       // passed to the runtime library - a 32-bit integer with the value zero.
10422       assert(NumThreads && "Thread limit expression should be available along "
10423                            "with number of teams.");
10424       SmallVector<llvm::Value *> OffloadingArgs = {
10425           RTLoc,
10426           DeviceID,
10427           OutlinedFnID,
10428           PointerNum,
10429           InputInfo.BasePointersArray.getPointer(),
10430           InputInfo.PointersArray.getPointer(),
10431           InputInfo.SizesArray.getPointer(),
10432           MapTypesArray,
10433           MapNamesArray,
10434           InputInfo.MappersArray.getPointer(),
10435           NumTeams,
10436           NumThreads};
10437       if (HasNowait) {
10438         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10439         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10440         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10441         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10442         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10443         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10444       }
10445       Return = CGF.EmitRuntimeCall(
10446           OMPBuilder.getOrCreateRuntimeFunction(
10447               CGM.getModule(), HasNowait
10448                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10449                                    : OMPRTL___tgt_target_teams_mapper),
10450           OffloadingArgs);
10451     } else {
10452       SmallVector<llvm::Value *> OffloadingArgs = {
10453           RTLoc,
10454           DeviceID,
10455           OutlinedFnID,
10456           PointerNum,
10457           InputInfo.BasePointersArray.getPointer(),
10458           InputInfo.PointersArray.getPointer(),
10459           InputInfo.SizesArray.getPointer(),
10460           MapTypesArray,
10461           MapNamesArray,
10462           InputInfo.MappersArray.getPointer()};
10463       if (HasNowait) {
10464         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10465         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10466         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10467         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10468         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10469         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10470       }
10471       Return = CGF.EmitRuntimeCall(
10472           OMPBuilder.getOrCreateRuntimeFunction(
10473               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10474                                          : OMPRTL___tgt_target_mapper),
10475           OffloadingArgs);
10476     }
10477 
10478     // Check the error code and execute the host version if required.
10479     llvm::BasicBlock *OffloadFailedBlock =
10480         CGF.createBasicBlock("omp_offload.failed");
10481     llvm::BasicBlock *OffloadContBlock =
10482         CGF.createBasicBlock("omp_offload.cont");
10483     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10484     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10485 
10486     CGF.EmitBlock(OffloadFailedBlock);
10487     FallbackGen(CGF);
10488 
10489     CGF.EmitBranch(OffloadContBlock);
10490 
10491     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10492   };
10493 
10494   // Notify that the host version must be executed.
10495   auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10496     FallbackGen(CGF);
10497   };
10498 
10499   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10500                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10501                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10502     // Fill up the arrays with all the captured variables.
10503     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10504 
10505     // Get mappable expression information.
10506     MappableExprsHandler MEHandler(D, CGF);
10507     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10508     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10509 
10510     auto RI = CS.getCapturedRecordDecl()->field_begin();
10511     auto *CV = CapturedVars.begin();
10512     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10513                                               CE = CS.capture_end();
10514          CI != CE; ++CI, ++RI, ++CV) {
10515       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10516       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10517 
10518       // VLA sizes are passed to the outlined region by copy and do not have map
10519       // information associated.
10520       if (CI->capturesVariableArrayType()) {
10521         CurInfo.Exprs.push_back(nullptr);
10522         CurInfo.BasePointers.push_back(*CV);
10523         CurInfo.Pointers.push_back(*CV);
10524         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10525             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10526         // Copy to the device as an argument. No need to retrieve it.
10527         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10528                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10529                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10530         CurInfo.Mappers.push_back(nullptr);
10531       } else {
10532         // If we have any information in the map clause, we use it, otherwise we
10533         // just do a default mapping.
10534         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10535         if (!CI->capturesThis())
10536           MappedVarSet.insert(CI->getCapturedVar());
10537         else
10538           MappedVarSet.insert(nullptr);
10539         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10540           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10541         // Generate correct mapping for variables captured by reference in
10542         // lambdas.
10543         if (CI->capturesVariable())
10544           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10545                                                   CurInfo, LambdaPointers);
10546       }
10547       // We expect to have at least an element of information for this capture.
10548       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10549              "Non-existing map pointer for capture!");
10550       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10551              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10552              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10553              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10554              "Inconsistent map information sizes!");
10555 
10556       // If there is an entry in PartialStruct it means we have a struct with
10557       // individual members mapped. Emit an extra combined entry.
10558       if (PartialStruct.Base.isValid()) {
10559         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10560         MEHandler.emitCombinedEntry(
10561             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10562             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10563       }
10564 
10565       // We need to append the results of this capture to what we already have.
10566       CombinedInfo.append(CurInfo);
10567     }
10568     // Adjust MEMBER_OF flags for the lambdas captures.
10569     MEHandler.adjustMemberOfForLambdaCaptures(
10570         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10571         CombinedInfo.Types);
10572     // Map any list items in a map clause that were not captures because they
10573     // weren't referenced within the construct.
10574     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10575 
10576     TargetDataInfo Info;
10577     // Fill up the arrays and create the arguments.
10578     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10579     emitOffloadingArraysArgument(
10580         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10581         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10582         {/*ForEndCall=*/false});
10583 
10584     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10585     InputInfo.BasePointersArray =
10586         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10587     InputInfo.PointersArray =
10588         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10589     InputInfo.SizesArray =
10590         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10591     InputInfo.MappersArray =
10592         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10593     MapTypesArray = Info.MapTypesArray;
10594     MapNamesArray = Info.MapNamesArray;
10595     if (RequiresOuterTask)
10596       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10597     else
10598       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10599   };
10600 
10601   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10602                              CodeGenFunction &CGF, PrePostActionTy &) {
10603     if (RequiresOuterTask) {
10604       CodeGenFunction::OMPTargetDataInfo InputInfo;
10605       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10606     } else {
10607       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10608     }
10609   };
10610 
10611   // If we have a target function ID it means that we need to support
10612   // offloading, otherwise, just execute on the host. We need to execute on host
10613   // regardless of the conditional in the if clause if, e.g., the user do not
10614   // specify target triples.
10615   if (OutlinedFnID) {
10616     if (IfCond) {
10617       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10618     } else {
10619       RegionCodeGenTy ThenRCG(TargetThenGen);
10620       ThenRCG(CGF);
10621     }
10622   } else {
10623     RegionCodeGenTy ElseRCG(TargetElseGen);
10624     ElseRCG(CGF);
10625   }
10626 }
10627 
10628 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10629                                                     StringRef ParentName) {
10630   if (!S)
10631     return;
10632 
10633   // Codegen OMP target directives that offload compute to the device.
10634   bool RequiresDeviceCodegen =
10635       isa<OMPExecutableDirective>(S) &&
10636       isOpenMPTargetExecutionDirective(
10637           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10638 
10639   if (RequiresDeviceCodegen) {
10640     const auto &E = *cast<OMPExecutableDirective>(S);
10641     unsigned DeviceID;
10642     unsigned FileID;
10643     unsigned Line;
10644     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10645                              FileID, Line);
10646 
10647     // Is this a target region that should not be emitted as an entry point? If
10648     // so just signal we are done with this target region.
10649     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10650                                                             ParentName, Line))
10651       return;
10652 
10653     switch (E.getDirectiveKind()) {
10654     case OMPD_target:
10655       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10656                                                    cast<OMPTargetDirective>(E));
10657       break;
10658     case OMPD_target_parallel:
10659       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10660           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10661       break;
10662     case OMPD_target_teams:
10663       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10664           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10665       break;
10666     case OMPD_target_teams_distribute:
10667       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10668           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10669       break;
10670     case OMPD_target_teams_distribute_simd:
10671       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10672           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10673       break;
10674     case OMPD_target_parallel_for:
10675       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10676           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10677       break;
10678     case OMPD_target_parallel_for_simd:
10679       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10680           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10681       break;
10682     case OMPD_target_simd:
10683       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10684           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10685       break;
10686     case OMPD_target_teams_distribute_parallel_for:
10687       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10688           CGM, ParentName,
10689           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10690       break;
10691     case OMPD_target_teams_distribute_parallel_for_simd:
10692       CodeGenFunction::
10693           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10694               CGM, ParentName,
10695               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10696       break;
10697     case OMPD_parallel:
10698     case OMPD_for:
10699     case OMPD_parallel_for:
10700     case OMPD_parallel_master:
10701     case OMPD_parallel_sections:
10702     case OMPD_for_simd:
10703     case OMPD_parallel_for_simd:
10704     case OMPD_cancel:
10705     case OMPD_cancellation_point:
10706     case OMPD_ordered:
10707     case OMPD_threadprivate:
10708     case OMPD_allocate:
10709     case OMPD_task:
10710     case OMPD_simd:
10711     case OMPD_tile:
10712     case OMPD_unroll:
10713     case OMPD_sections:
10714     case OMPD_section:
10715     case OMPD_single:
10716     case OMPD_master:
10717     case OMPD_critical:
10718     case OMPD_taskyield:
10719     case OMPD_barrier:
10720     case OMPD_taskwait:
10721     case OMPD_taskgroup:
10722     case OMPD_atomic:
10723     case OMPD_flush:
10724     case OMPD_depobj:
10725     case OMPD_scan:
10726     case OMPD_teams:
10727     case OMPD_target_data:
10728     case OMPD_target_exit_data:
10729     case OMPD_target_enter_data:
10730     case OMPD_distribute:
10731     case OMPD_distribute_simd:
10732     case OMPD_distribute_parallel_for:
10733     case OMPD_distribute_parallel_for_simd:
10734     case OMPD_teams_distribute:
10735     case OMPD_teams_distribute_simd:
10736     case OMPD_teams_distribute_parallel_for:
10737     case OMPD_teams_distribute_parallel_for_simd:
10738     case OMPD_target_update:
10739     case OMPD_declare_simd:
10740     case OMPD_declare_variant:
10741     case OMPD_begin_declare_variant:
10742     case OMPD_end_declare_variant:
10743     case OMPD_declare_target:
10744     case OMPD_end_declare_target:
10745     case OMPD_declare_reduction:
10746     case OMPD_declare_mapper:
10747     case OMPD_taskloop:
10748     case OMPD_taskloop_simd:
10749     case OMPD_master_taskloop:
10750     case OMPD_master_taskloop_simd:
10751     case OMPD_parallel_master_taskloop:
10752     case OMPD_parallel_master_taskloop_simd:
10753     case OMPD_requires:
10754     case OMPD_metadirective:
10755     case OMPD_unknown:
10756     default:
10757       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10758     }
10759     return;
10760   }
10761 
10762   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10763     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10764       return;
10765 
10766     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10767     return;
10768   }
10769 
10770   // If this is a lambda function, look into its body.
10771   if (const auto *L = dyn_cast<LambdaExpr>(S))
10772     S = L->getBody();
10773 
10774   // Keep looking for target regions recursively.
10775   for (const Stmt *II : S->children())
10776     scanForTargetRegionsFunctions(II, ParentName);
10777 }
10778 
10779 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10780   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10781       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10782   if (!DevTy)
10783     return false;
10784   // Do not emit device_type(nohost) functions for the host.
10785   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10786     return true;
10787   // Do not emit device_type(host) functions for the device.
10788   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10789     return true;
10790   return false;
10791 }
10792 
10793 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10794   // If emitting code for the host, we do not process FD here. Instead we do
10795   // the normal code generation.
10796   if (!CGM.getLangOpts().OpenMPIsDevice) {
10797     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10798       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10799                                   CGM.getLangOpts().OpenMPIsDevice))
10800         return true;
10801     return false;
10802   }
10803 
10804   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10805   // Try to detect target regions in the function.
10806   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10807     StringRef Name = CGM.getMangledName(GD);
10808     scanForTargetRegionsFunctions(FD->getBody(), Name);
10809     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10810                                 CGM.getLangOpts().OpenMPIsDevice))
10811       return true;
10812   }
10813 
10814   // Do not to emit function if it is not marked as declare target.
10815   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10816          AlreadyEmittedTargetDecls.count(VD) == 0;
10817 }
10818 
10819 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10820   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10821                               CGM.getLangOpts().OpenMPIsDevice))
10822     return true;
10823 
10824   if (!CGM.getLangOpts().OpenMPIsDevice)
10825     return false;
10826 
10827   // Check if there are Ctors/Dtors in this declaration and look for target
10828   // regions in it. We use the complete variant to produce the kernel name
10829   // mangling.
10830   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10831   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10832     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10833       StringRef ParentName =
10834           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10835       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10836     }
10837     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10838       StringRef ParentName =
10839           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10840       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10841     }
10842   }
10843 
10844   // Do not to emit variable if it is not marked as declare target.
10845   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10846       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10847           cast<VarDecl>(GD.getDecl()));
10848   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10849       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10850        HasRequiresUnifiedSharedMemory)) {
10851     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10852     return true;
10853   }
10854   return false;
10855 }
10856 
10857 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10858                                                    llvm::Constant *Addr) {
10859   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10860       !CGM.getLangOpts().OpenMPIsDevice)
10861     return;
10862 
10863   // If we have host/nohost variables, they do not need to be registered.
10864   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10865       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10866   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10867     return;
10868 
10869   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10870       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10871   if (!Res) {
10872     if (CGM.getLangOpts().OpenMPIsDevice) {
10873       // Register non-target variables being emitted in device code (debug info
10874       // may cause this).
10875       StringRef VarName = CGM.getMangledName(VD);
10876       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10877     }
10878     return;
10879   }
10880   // Register declare target variables.
10881   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10882   StringRef VarName;
10883   CharUnits VarSize;
10884   llvm::GlobalValue::LinkageTypes Linkage;
10885 
10886   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10887       !HasRequiresUnifiedSharedMemory) {
10888     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10889     VarName = CGM.getMangledName(VD);
10890     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10891       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10892       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10893     } else {
10894       VarSize = CharUnits::Zero();
10895     }
10896     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10897     // Temp solution to prevent optimizations of the internal variables.
10898     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10899       // Do not create a "ref-variable" if the original is not also available
10900       // on the host.
10901       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10902         return;
10903       std::string RefName = getName({VarName, "ref"});
10904       if (!CGM.GetGlobalValue(RefName)) {
10905         llvm::Constant *AddrRef =
10906             getOrCreateInternalVariable(Addr->getType(), RefName);
10907         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10908         GVAddrRef->setConstant(/*Val=*/true);
10909         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10910         GVAddrRef->setInitializer(Addr);
10911         CGM.addCompilerUsedGlobal(GVAddrRef);
10912       }
10913     }
10914   } else {
10915     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10916             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10917              HasRequiresUnifiedSharedMemory)) &&
10918            "Declare target attribute must link or to with unified memory.");
10919     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10920       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10921     else
10922       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10923 
10924     if (CGM.getLangOpts().OpenMPIsDevice) {
10925       VarName = Addr->getName();
10926       Addr = nullptr;
10927     } else {
10928       VarName = getAddrOfDeclareTargetVar(VD).getName();
10929       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10930     }
10931     VarSize = CGM.getPointerSize();
10932     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10933   }
10934 
10935   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10936       VarName, Addr, VarSize, Flags, Linkage);
10937 }
10938 
10939 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10940   if (isa<FunctionDecl>(GD.getDecl()) ||
10941       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10942     return emitTargetFunctions(GD);
10943 
10944   return emitTargetGlobalVariable(GD);
10945 }
10946 
10947 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10948   for (const VarDecl *VD : DeferredGlobalVariables) {
10949     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10950         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10951     if (!Res)
10952       continue;
10953     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10954         !HasRequiresUnifiedSharedMemory) {
10955       CGM.EmitGlobal(VD);
10956     } else {
10957       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10958               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10959                HasRequiresUnifiedSharedMemory)) &&
10960              "Expected link clause or to clause with unified memory.");
10961       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10962     }
10963   }
10964 }
10965 
10966 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10967     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10968   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10969          " Expected target-based directive.");
10970 }
10971 
10972 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10973   for (const OMPClause *Clause : D->clauselists()) {
10974     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10975       HasRequiresUnifiedSharedMemory = true;
10976     } else if (const auto *AC =
10977                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10978       switch (AC->getAtomicDefaultMemOrderKind()) {
10979       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10980         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10981         break;
10982       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10983         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10984         break;
10985       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10986         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10987         break;
10988       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10989         break;
10990       }
10991     }
10992   }
10993 }
10994 
10995 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10996   return RequiresAtomicOrdering;
10997 }
10998 
10999 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11000                                                        LangAS &AS) {
11001   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11002     return false;
11003   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11004   switch(A->getAllocatorType()) {
11005   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11006   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11007   // Not supported, fallback to the default mem space.
11008   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11009   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11010   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11011   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11012   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11013   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11014   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11015     AS = LangAS::Default;
11016     return true;
11017   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11018     llvm_unreachable("Expected predefined allocator for the variables with the "
11019                      "static storage.");
11020   }
11021   return false;
11022 }
11023 
11024 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11025   return HasRequiresUnifiedSharedMemory;
11026 }
11027 
11028 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11029     CodeGenModule &CGM)
11030     : CGM(CGM) {
11031   if (CGM.getLangOpts().OpenMPIsDevice) {
11032     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11033     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11034   }
11035 }
11036 
11037 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11038   if (CGM.getLangOpts().OpenMPIsDevice)
11039     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11040 }
11041 
11042 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11043   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11044     return true;
11045 
11046   const auto *D = cast<FunctionDecl>(GD.getDecl());
11047   // Do not to emit function if it is marked as declare target as it was already
11048   // emitted.
11049   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11050     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11051       if (auto *F = dyn_cast_or_null<llvm::Function>(
11052               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11053         return !F->isDeclaration();
11054       return false;
11055     }
11056     return true;
11057   }
11058 
11059   return !AlreadyEmittedTargetDecls.insert(D).second;
11060 }
11061 
11062 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11063   // If we don't have entries or if we are emitting code for the device, we
11064   // don't need to do anything.
11065   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11066       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11067       (OffloadEntriesInfoManager.empty() &&
11068        !HasEmittedDeclareTargetRegion &&
11069        !HasEmittedTargetRegion))
11070     return nullptr;
11071 
11072   // Create and register the function that handles the requires directives.
11073   ASTContext &C = CGM.getContext();
11074 
11075   llvm::Function *RequiresRegFn;
11076   {
11077     CodeGenFunction CGF(CGM);
11078     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11079     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11080     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11081     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11082     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11083     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11084     // TODO: check for other requires clauses.
11085     // The requires directive takes effect only when a target region is
11086     // present in the compilation unit. Otherwise it is ignored and not
11087     // passed to the runtime. This avoids the runtime from throwing an error
11088     // for mismatching requires clauses across compilation units that don't
11089     // contain at least 1 target region.
11090     assert((HasEmittedTargetRegion ||
11091             HasEmittedDeclareTargetRegion ||
11092             !OffloadEntriesInfoManager.empty()) &&
11093            "Target or declare target region expected.");
11094     if (HasRequiresUnifiedSharedMemory)
11095       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11096     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11097                             CGM.getModule(), OMPRTL___tgt_register_requires),
11098                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11099     CGF.FinishFunction();
11100   }
11101   return RequiresRegFn;
11102 }
11103 
11104 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11105                                     const OMPExecutableDirective &D,
11106                                     SourceLocation Loc,
11107                                     llvm::Function *OutlinedFn,
11108                                     ArrayRef<llvm::Value *> CapturedVars) {
11109   if (!CGF.HaveInsertPoint())
11110     return;
11111 
11112   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11113   CodeGenFunction::RunCleanupsScope Scope(CGF);
11114 
11115   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11116   llvm::Value *Args[] = {
11117       RTLoc,
11118       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11119       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11120   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11121   RealArgs.append(std::begin(Args), std::end(Args));
11122   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11123 
11124   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11125       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11126   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11127 }
11128 
11129 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11130                                          const Expr *NumTeams,
11131                                          const Expr *ThreadLimit,
11132                                          SourceLocation Loc) {
11133   if (!CGF.HaveInsertPoint())
11134     return;
11135 
11136   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11137 
11138   llvm::Value *NumTeamsVal =
11139       NumTeams
11140           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11141                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11142           : CGF.Builder.getInt32(0);
11143 
11144   llvm::Value *ThreadLimitVal =
11145       ThreadLimit
11146           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11147                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11148           : CGF.Builder.getInt32(0);
11149 
11150   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11151   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11152                                      ThreadLimitVal};
11153   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11154                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11155                       PushNumTeamsArgs);
11156 }
11157 
11158 void CGOpenMPRuntime::emitTargetDataCalls(
11159     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11160     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11161   if (!CGF.HaveInsertPoint())
11162     return;
11163 
11164   // Action used to replace the default codegen action and turn privatization
11165   // off.
11166   PrePostActionTy NoPrivAction;
11167 
11168   // Generate the code for the opening of the data environment. Capture all the
11169   // arguments of the runtime call by reference because they are used in the
11170   // closing of the region.
11171   auto &&BeginThenGen = [this, &D, Device, &Info,
11172                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11173     // Fill up the arrays with all the mapped variables.
11174     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11175 
11176     // Get map clause information.
11177     MappableExprsHandler MEHandler(D, CGF);
11178     MEHandler.generateAllInfo(CombinedInfo);
11179 
11180     // Fill up the arrays and create the arguments.
11181     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11182                          /*IsNonContiguous=*/true);
11183 
11184     llvm::Value *BasePointersArrayArg = nullptr;
11185     llvm::Value *PointersArrayArg = nullptr;
11186     llvm::Value *SizesArrayArg = nullptr;
11187     llvm::Value *MapTypesArrayArg = nullptr;
11188     llvm::Value *MapNamesArrayArg = nullptr;
11189     llvm::Value *MappersArrayArg = nullptr;
11190     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11191                                  SizesArrayArg, MapTypesArrayArg,
11192                                  MapNamesArrayArg, MappersArrayArg, Info);
11193 
11194     // Emit device ID if any.
11195     llvm::Value *DeviceID = nullptr;
11196     if (Device) {
11197       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11198                                            CGF.Int64Ty, /*isSigned=*/true);
11199     } else {
11200       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11201     }
11202 
11203     // Emit the number of elements in the offloading arrays.
11204     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11205     //
11206     // Source location for the ident struct
11207     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11208 
11209     llvm::Value *OffloadingArgs[] = {RTLoc,
11210                                      DeviceID,
11211                                      PointerNum,
11212                                      BasePointersArrayArg,
11213                                      PointersArrayArg,
11214                                      SizesArrayArg,
11215                                      MapTypesArrayArg,
11216                                      MapNamesArrayArg,
11217                                      MappersArrayArg};
11218     CGF.EmitRuntimeCall(
11219         OMPBuilder.getOrCreateRuntimeFunction(
11220             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11221         OffloadingArgs);
11222 
11223     // If device pointer privatization is required, emit the body of the region
11224     // here. It will have to be duplicated: with and without privatization.
11225     if (!Info.CaptureDeviceAddrMap.empty())
11226       CodeGen(CGF);
11227   };
11228 
11229   // Generate code for the closing of the data region.
11230   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11231                                                 PrePostActionTy &) {
11232     assert(Info.isValid() && "Invalid data environment closing arguments.");
11233 
11234     llvm::Value *BasePointersArrayArg = nullptr;
11235     llvm::Value *PointersArrayArg = nullptr;
11236     llvm::Value *SizesArrayArg = nullptr;
11237     llvm::Value *MapTypesArrayArg = nullptr;
11238     llvm::Value *MapNamesArrayArg = nullptr;
11239     llvm::Value *MappersArrayArg = nullptr;
11240     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11241                                  SizesArrayArg, MapTypesArrayArg,
11242                                  MapNamesArrayArg, MappersArrayArg, Info,
11243                                  {/*ForEndCall=*/true});
11244 
11245     // Emit device ID if any.
11246     llvm::Value *DeviceID = nullptr;
11247     if (Device) {
11248       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11249                                            CGF.Int64Ty, /*isSigned=*/true);
11250     } else {
11251       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11252     }
11253 
11254     // Emit the number of elements in the offloading arrays.
11255     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11256 
11257     // Source location for the ident struct
11258     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11259 
11260     llvm::Value *OffloadingArgs[] = {RTLoc,
11261                                      DeviceID,
11262                                      PointerNum,
11263                                      BasePointersArrayArg,
11264                                      PointersArrayArg,
11265                                      SizesArrayArg,
11266                                      MapTypesArrayArg,
11267                                      MapNamesArrayArg,
11268                                      MappersArrayArg};
11269     CGF.EmitRuntimeCall(
11270         OMPBuilder.getOrCreateRuntimeFunction(
11271             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11272         OffloadingArgs);
11273   };
11274 
11275   // If we need device pointer privatization, we need to emit the body of the
11276   // region with no privatization in the 'else' branch of the conditional.
11277   // Otherwise, we don't have to do anything.
11278   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11279                                                          PrePostActionTy &) {
11280     if (!Info.CaptureDeviceAddrMap.empty()) {
11281       CodeGen.setAction(NoPrivAction);
11282       CodeGen(CGF);
11283     }
11284   };
11285 
11286   // We don't have to do anything to close the region if the if clause evaluates
11287   // to false.
11288   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11289 
11290   if (IfCond) {
11291     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11292   } else {
11293     RegionCodeGenTy RCG(BeginThenGen);
11294     RCG(CGF);
11295   }
11296 
11297   // If we don't require privatization of device pointers, we emit the body in
11298   // between the runtime calls. This avoids duplicating the body code.
11299   if (Info.CaptureDeviceAddrMap.empty()) {
11300     CodeGen.setAction(NoPrivAction);
11301     CodeGen(CGF);
11302   }
11303 
11304   if (IfCond) {
11305     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11306   } else {
11307     RegionCodeGenTy RCG(EndThenGen);
11308     RCG(CGF);
11309   }
11310 }
11311 
11312 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11313     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11314     const Expr *Device) {
11315   if (!CGF.HaveInsertPoint())
11316     return;
11317 
11318   assert((isa<OMPTargetEnterDataDirective>(D) ||
11319           isa<OMPTargetExitDataDirective>(D) ||
11320           isa<OMPTargetUpdateDirective>(D)) &&
11321          "Expecting either target enter, exit data, or update directives.");
11322 
11323   CodeGenFunction::OMPTargetDataInfo InputInfo;
11324   llvm::Value *MapTypesArray = nullptr;
11325   llvm::Value *MapNamesArray = nullptr;
11326   // Generate the code for the opening of the data environment.
11327   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11328                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11329     // Emit device ID if any.
11330     llvm::Value *DeviceID = nullptr;
11331     if (Device) {
11332       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11333                                            CGF.Int64Ty, /*isSigned=*/true);
11334     } else {
11335       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11336     }
11337 
11338     // Emit the number of elements in the offloading arrays.
11339     llvm::Constant *PointerNum =
11340         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11341 
11342     // Source location for the ident struct
11343     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11344 
11345     llvm::Value *OffloadingArgs[] = {RTLoc,
11346                                      DeviceID,
11347                                      PointerNum,
11348                                      InputInfo.BasePointersArray.getPointer(),
11349                                      InputInfo.PointersArray.getPointer(),
11350                                      InputInfo.SizesArray.getPointer(),
11351                                      MapTypesArray,
11352                                      MapNamesArray,
11353                                      InputInfo.MappersArray.getPointer()};
11354 
11355     // Select the right runtime function call for each standalone
11356     // directive.
11357     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11358     RuntimeFunction RTLFn;
11359     switch (D.getDirectiveKind()) {
11360     case OMPD_target_enter_data:
11361       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11362                         : OMPRTL___tgt_target_data_begin_mapper;
11363       break;
11364     case OMPD_target_exit_data:
11365       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11366                         : OMPRTL___tgt_target_data_end_mapper;
11367       break;
11368     case OMPD_target_update:
11369       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11370                         : OMPRTL___tgt_target_data_update_mapper;
11371       break;
11372     case OMPD_parallel:
11373     case OMPD_for:
11374     case OMPD_parallel_for:
11375     case OMPD_parallel_master:
11376     case OMPD_parallel_sections:
11377     case OMPD_for_simd:
11378     case OMPD_parallel_for_simd:
11379     case OMPD_cancel:
11380     case OMPD_cancellation_point:
11381     case OMPD_ordered:
11382     case OMPD_threadprivate:
11383     case OMPD_allocate:
11384     case OMPD_task:
11385     case OMPD_simd:
11386     case OMPD_tile:
11387     case OMPD_unroll:
11388     case OMPD_sections:
11389     case OMPD_section:
11390     case OMPD_single:
11391     case OMPD_master:
11392     case OMPD_critical:
11393     case OMPD_taskyield:
11394     case OMPD_barrier:
11395     case OMPD_taskwait:
11396     case OMPD_taskgroup:
11397     case OMPD_atomic:
11398     case OMPD_flush:
11399     case OMPD_depobj:
11400     case OMPD_scan:
11401     case OMPD_teams:
11402     case OMPD_target_data:
11403     case OMPD_distribute:
11404     case OMPD_distribute_simd:
11405     case OMPD_distribute_parallel_for:
11406     case OMPD_distribute_parallel_for_simd:
11407     case OMPD_teams_distribute:
11408     case OMPD_teams_distribute_simd:
11409     case OMPD_teams_distribute_parallel_for:
11410     case OMPD_teams_distribute_parallel_for_simd:
11411     case OMPD_declare_simd:
11412     case OMPD_declare_variant:
11413     case OMPD_begin_declare_variant:
11414     case OMPD_end_declare_variant:
11415     case OMPD_declare_target:
11416     case OMPD_end_declare_target:
11417     case OMPD_declare_reduction:
11418     case OMPD_declare_mapper:
11419     case OMPD_taskloop:
11420     case OMPD_taskloop_simd:
11421     case OMPD_master_taskloop:
11422     case OMPD_master_taskloop_simd:
11423     case OMPD_parallel_master_taskloop:
11424     case OMPD_parallel_master_taskloop_simd:
11425     case OMPD_target:
11426     case OMPD_target_simd:
11427     case OMPD_target_teams_distribute:
11428     case OMPD_target_teams_distribute_simd:
11429     case OMPD_target_teams_distribute_parallel_for:
11430     case OMPD_target_teams_distribute_parallel_for_simd:
11431     case OMPD_target_teams:
11432     case OMPD_target_parallel:
11433     case OMPD_target_parallel_for:
11434     case OMPD_target_parallel_for_simd:
11435     case OMPD_requires:
11436     case OMPD_metadirective:
11437     case OMPD_unknown:
11438     default:
11439       llvm_unreachable("Unexpected standalone target data directive.");
11440       break;
11441     }
11442     CGF.EmitRuntimeCall(
11443         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11444         OffloadingArgs);
11445   };
11446 
11447   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11448                           &MapNamesArray](CodeGenFunction &CGF,
11449                                           PrePostActionTy &) {
11450     // Fill up the arrays with all the mapped variables.
11451     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11452 
11453     // Get map clause information.
11454     MappableExprsHandler MEHandler(D, CGF);
11455     MEHandler.generateAllInfo(CombinedInfo);
11456 
11457     TargetDataInfo Info;
11458     // Fill up the arrays and create the arguments.
11459     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11460                          /*IsNonContiguous=*/true);
11461     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11462                              D.hasClausesOfKind<OMPNowaitClause>();
11463     emitOffloadingArraysArgument(
11464         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11465         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11466         {/*ForEndCall=*/false});
11467     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11468     InputInfo.BasePointersArray =
11469         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11470     InputInfo.PointersArray =
11471         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11472     InputInfo.SizesArray =
11473         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11474     InputInfo.MappersArray =
11475         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11476     MapTypesArray = Info.MapTypesArray;
11477     MapNamesArray = Info.MapNamesArray;
11478     if (RequiresOuterTask)
11479       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11480     else
11481       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11482   };
11483 
11484   if (IfCond) {
11485     emitIfClause(CGF, IfCond, TargetThenGen,
11486                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11487   } else {
11488     RegionCodeGenTy ThenRCG(TargetThenGen);
11489     ThenRCG(CGF);
11490   }
11491 }
11492 
11493 namespace {
11494   /// Kind of parameter in a function with 'declare simd' directive.
11495   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11496   /// Attribute set of the parameter.
11497   struct ParamAttrTy {
11498     ParamKindTy Kind = Vector;
11499     llvm::APSInt StrideOrArg;
11500     llvm::APSInt Alignment;
11501   };
11502 } // namespace
11503 
11504 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11505                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11506   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11507   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11508   // of that clause. The VLEN value must be power of 2.
11509   // In other case the notion of the function`s "characteristic data type" (CDT)
11510   // is used to compute the vector length.
11511   // CDT is defined in the following order:
11512   //   a) For non-void function, the CDT is the return type.
11513   //   b) If the function has any non-uniform, non-linear parameters, then the
11514   //   CDT is the type of the first such parameter.
11515   //   c) If the CDT determined by a) or b) above is struct, union, or class
11516   //   type which is pass-by-value (except for the type that maps to the
11517   //   built-in complex data type), the characteristic data type is int.
11518   //   d) If none of the above three cases is applicable, the CDT is int.
11519   // The VLEN is then determined based on the CDT and the size of vector
11520   // register of that ISA for which current vector version is generated. The
11521   // VLEN is computed using the formula below:
11522   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11523   // where vector register size specified in section 3.2.1 Registers and the
11524   // Stack Frame of original AMD64 ABI document.
11525   QualType RetType = FD->getReturnType();
11526   if (RetType.isNull())
11527     return 0;
11528   ASTContext &C = FD->getASTContext();
11529   QualType CDT;
11530   if (!RetType.isNull() && !RetType->isVoidType()) {
11531     CDT = RetType;
11532   } else {
11533     unsigned Offset = 0;
11534     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11535       if (ParamAttrs[Offset].Kind == Vector)
11536         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11537       ++Offset;
11538     }
11539     if (CDT.isNull()) {
11540       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11541         if (ParamAttrs[I + Offset].Kind == Vector) {
11542           CDT = FD->getParamDecl(I)->getType();
11543           break;
11544         }
11545       }
11546     }
11547   }
11548   if (CDT.isNull())
11549     CDT = C.IntTy;
11550   CDT = CDT->getCanonicalTypeUnqualified();
11551   if (CDT->isRecordType() || CDT->isUnionType())
11552     CDT = C.IntTy;
11553   return C.getTypeSize(CDT);
11554 }
11555 
11556 static void
11557 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11558                            const llvm::APSInt &VLENVal,
11559                            ArrayRef<ParamAttrTy> ParamAttrs,
11560                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11561   struct ISADataTy {
11562     char ISA;
11563     unsigned VecRegSize;
11564   };
11565   ISADataTy ISAData[] = {
11566       {
11567           'b', 128
11568       }, // SSE
11569       {
11570           'c', 256
11571       }, // AVX
11572       {
11573           'd', 256
11574       }, // AVX2
11575       {
11576           'e', 512
11577       }, // AVX512
11578   };
11579   llvm::SmallVector<char, 2> Masked;
11580   switch (State) {
11581   case OMPDeclareSimdDeclAttr::BS_Undefined:
11582     Masked.push_back('N');
11583     Masked.push_back('M');
11584     break;
11585   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11586     Masked.push_back('N');
11587     break;
11588   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11589     Masked.push_back('M');
11590     break;
11591   }
11592   for (char Mask : Masked) {
11593     for (const ISADataTy &Data : ISAData) {
11594       SmallString<256> Buffer;
11595       llvm::raw_svector_ostream Out(Buffer);
11596       Out << "_ZGV" << Data.ISA << Mask;
11597       if (!VLENVal) {
11598         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11599         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11600         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11601       } else {
11602         Out << VLENVal;
11603       }
11604       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11605         switch (ParamAttr.Kind){
11606         case LinearWithVarStride:
11607           Out << 's' << ParamAttr.StrideOrArg;
11608           break;
11609         case Linear:
11610           Out << 'l';
11611           if (ParamAttr.StrideOrArg != 1)
11612             Out << ParamAttr.StrideOrArg;
11613           break;
11614         case Uniform:
11615           Out << 'u';
11616           break;
11617         case Vector:
11618           Out << 'v';
11619           break;
11620         }
11621         if (!!ParamAttr.Alignment)
11622           Out << 'a' << ParamAttr.Alignment;
11623       }
11624       Out << '_' << Fn->getName();
11625       Fn->addFnAttr(Out.str());
11626     }
11627   }
11628 }
11629 
11630 // This are the Functions that are needed to mangle the name of the
11631 // vector functions generated by the compiler, according to the rules
11632 // defined in the "Vector Function ABI specifications for AArch64",
11633 // available at
11634 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11635 
11636 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11637 ///
11638 /// TODO: Need to implement the behavior for reference marked with a
11639 /// var or no linear modifiers (1.b in the section). For this, we
11640 /// need to extend ParamKindTy to support the linear modifiers.
11641 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11642   QT = QT.getCanonicalType();
11643 
11644   if (QT->isVoidType())
11645     return false;
11646 
11647   if (Kind == ParamKindTy::Uniform)
11648     return false;
11649 
11650   if (Kind == ParamKindTy::Linear)
11651     return false;
11652 
11653   // TODO: Handle linear references with modifiers
11654 
11655   if (Kind == ParamKindTy::LinearWithVarStride)
11656     return false;
11657 
11658   return true;
11659 }
11660 
11661 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11662 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11663   QT = QT.getCanonicalType();
11664   unsigned Size = C.getTypeSize(QT);
11665 
11666   // Only scalars and complex within 16 bytes wide set PVB to true.
11667   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11668     return false;
11669 
11670   if (QT->isFloatingType())
11671     return true;
11672 
11673   if (QT->isIntegerType())
11674     return true;
11675 
11676   if (QT->isPointerType())
11677     return true;
11678 
11679   // TODO: Add support for complex types (section 3.1.2, item 2).
11680 
11681   return false;
11682 }
11683 
11684 /// Computes the lane size (LS) of a return type or of an input parameter,
11685 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11686 /// TODO: Add support for references, section 3.2.1, item 1.
11687 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11688   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11689     QualType PTy = QT.getCanonicalType()->getPointeeType();
11690     if (getAArch64PBV(PTy, C))
11691       return C.getTypeSize(PTy);
11692   }
11693   if (getAArch64PBV(QT, C))
11694     return C.getTypeSize(QT);
11695 
11696   return C.getTypeSize(C.getUIntPtrType());
11697 }
11698 
11699 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11700 // signature of the scalar function, as defined in 3.2.2 of the
11701 // AAVFABI.
11702 static std::tuple<unsigned, unsigned, bool>
11703 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11704   QualType RetType = FD->getReturnType().getCanonicalType();
11705 
11706   ASTContext &C = FD->getASTContext();
11707 
11708   bool OutputBecomesInput = false;
11709 
11710   llvm::SmallVector<unsigned, 8> Sizes;
11711   if (!RetType->isVoidType()) {
11712     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11713     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11714       OutputBecomesInput = true;
11715   }
11716   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11717     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11718     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11719   }
11720 
11721   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11722   // The LS of a function parameter / return value can only be a power
11723   // of 2, starting from 8 bits, up to 128.
11724   assert(llvm::all_of(Sizes,
11725                       [](unsigned Size) {
11726                         return Size == 8 || Size == 16 || Size == 32 ||
11727                                Size == 64 || Size == 128;
11728                       }) &&
11729          "Invalid size");
11730 
11731   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11732                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11733                          OutputBecomesInput);
11734 }
11735 
11736 /// Mangle the parameter part of the vector function name according to
11737 /// their OpenMP classification. The mangling function is defined in
11738 /// section 3.5 of the AAVFABI.
11739 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11740   SmallString<256> Buffer;
11741   llvm::raw_svector_ostream Out(Buffer);
11742   for (const auto &ParamAttr : ParamAttrs) {
11743     switch (ParamAttr.Kind) {
11744     case LinearWithVarStride:
11745       Out << "ls" << ParamAttr.StrideOrArg;
11746       break;
11747     case Linear:
11748       Out << 'l';
11749       // Don't print the step value if it is not present or if it is
11750       // equal to 1.
11751       if (ParamAttr.StrideOrArg != 1)
11752         Out << ParamAttr.StrideOrArg;
11753       break;
11754     case Uniform:
11755       Out << 'u';
11756       break;
11757     case Vector:
11758       Out << 'v';
11759       break;
11760     }
11761 
11762     if (!!ParamAttr.Alignment)
11763       Out << 'a' << ParamAttr.Alignment;
11764   }
11765 
11766   return std::string(Out.str());
11767 }
11768 
11769 // Function used to add the attribute. The parameter `VLEN` is
11770 // templated to allow the use of "x" when targeting scalable functions
11771 // for SVE.
11772 template <typename T>
11773 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11774                                  char ISA, StringRef ParSeq,
11775                                  StringRef MangledName, bool OutputBecomesInput,
11776                                  llvm::Function *Fn) {
11777   SmallString<256> Buffer;
11778   llvm::raw_svector_ostream Out(Buffer);
11779   Out << Prefix << ISA << LMask << VLEN;
11780   if (OutputBecomesInput)
11781     Out << "v";
11782   Out << ParSeq << "_" << MangledName;
11783   Fn->addFnAttr(Out.str());
11784 }
11785 
11786 // Helper function to generate the Advanced SIMD names depending on
11787 // the value of the NDS when simdlen is not present.
11788 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11789                                       StringRef Prefix, char ISA,
11790                                       StringRef ParSeq, StringRef MangledName,
11791                                       bool OutputBecomesInput,
11792                                       llvm::Function *Fn) {
11793   switch (NDS) {
11794   case 8:
11795     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11796                          OutputBecomesInput, Fn);
11797     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11798                          OutputBecomesInput, Fn);
11799     break;
11800   case 16:
11801     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11802                          OutputBecomesInput, Fn);
11803     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11804                          OutputBecomesInput, Fn);
11805     break;
11806   case 32:
11807     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11808                          OutputBecomesInput, Fn);
11809     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11810                          OutputBecomesInput, Fn);
11811     break;
11812   case 64:
11813   case 128:
11814     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11815                          OutputBecomesInput, Fn);
11816     break;
11817   default:
11818     llvm_unreachable("Scalar type is too wide.");
11819   }
11820 }
11821 
11822 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11823 static void emitAArch64DeclareSimdFunction(
11824     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11825     ArrayRef<ParamAttrTy> ParamAttrs,
11826     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11827     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11828 
11829   // Get basic data for building the vector signature.
11830   const auto Data = getNDSWDS(FD, ParamAttrs);
11831   const unsigned NDS = std::get<0>(Data);
11832   const unsigned WDS = std::get<1>(Data);
11833   const bool OutputBecomesInput = std::get<2>(Data);
11834 
11835   // Check the values provided via `simdlen` by the user.
11836   // 1. A `simdlen(1)` doesn't produce vector signatures,
11837   if (UserVLEN == 1) {
11838     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11839         DiagnosticsEngine::Warning,
11840         "The clause simdlen(1) has no effect when targeting aarch64.");
11841     CGM.getDiags().Report(SLoc, DiagID);
11842     return;
11843   }
11844 
11845   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11846   // Advanced SIMD output.
11847   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11848     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11849         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11850                                     "power of 2 when targeting Advanced SIMD.");
11851     CGM.getDiags().Report(SLoc, DiagID);
11852     return;
11853   }
11854 
11855   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11856   // limits.
11857   if (ISA == 's' && UserVLEN != 0) {
11858     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11859       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11860           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11861                                       "lanes in the architectural constraints "
11862                                       "for SVE (min is 128-bit, max is "
11863                                       "2048-bit, by steps of 128-bit)");
11864       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11865       return;
11866     }
11867   }
11868 
11869   // Sort out parameter sequence.
11870   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11871   StringRef Prefix = "_ZGV";
11872   // Generate simdlen from user input (if any).
11873   if (UserVLEN) {
11874     if (ISA == 's') {
11875       // SVE generates only a masked function.
11876       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11877                            OutputBecomesInput, Fn);
11878     } else {
11879       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11880       // Advanced SIMD generates one or two functions, depending on
11881       // the `[not]inbranch` clause.
11882       switch (State) {
11883       case OMPDeclareSimdDeclAttr::BS_Undefined:
11884         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11885                              OutputBecomesInput, Fn);
11886         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11887                              OutputBecomesInput, Fn);
11888         break;
11889       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11890         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11891                              OutputBecomesInput, Fn);
11892         break;
11893       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11894         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11895                              OutputBecomesInput, Fn);
11896         break;
11897       }
11898     }
11899   } else {
11900     // If no user simdlen is provided, follow the AAVFABI rules for
11901     // generating the vector length.
11902     if (ISA == 's') {
11903       // SVE, section 3.4.1, item 1.
11904       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11905                            OutputBecomesInput, Fn);
11906     } else {
11907       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11908       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11909       // two vector names depending on the use of the clause
11910       // `[not]inbranch`.
11911       switch (State) {
11912       case OMPDeclareSimdDeclAttr::BS_Undefined:
11913         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11914                                   OutputBecomesInput, Fn);
11915         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11916                                   OutputBecomesInput, Fn);
11917         break;
11918       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11919         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11920                                   OutputBecomesInput, Fn);
11921         break;
11922       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11923         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11924                                   OutputBecomesInput, Fn);
11925         break;
11926       }
11927     }
11928   }
11929 }
11930 
11931 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11932                                               llvm::Function *Fn) {
11933   ASTContext &C = CGM.getContext();
11934   FD = FD->getMostRecentDecl();
11935   // Map params to their positions in function decl.
11936   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11937   if (isa<CXXMethodDecl>(FD))
11938     ParamPositions.try_emplace(FD, 0);
11939   unsigned ParamPos = ParamPositions.size();
11940   for (const ParmVarDecl *P : FD->parameters()) {
11941     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11942     ++ParamPos;
11943   }
11944   while (FD) {
11945     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11946       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11947       // Mark uniform parameters.
11948       for (const Expr *E : Attr->uniforms()) {
11949         E = E->IgnoreParenImpCasts();
11950         unsigned Pos;
11951         if (isa<CXXThisExpr>(E)) {
11952           Pos = ParamPositions[FD];
11953         } else {
11954           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11955                                 ->getCanonicalDecl();
11956           Pos = ParamPositions[PVD];
11957         }
11958         ParamAttrs[Pos].Kind = Uniform;
11959       }
11960       // Get alignment info.
11961       auto *NI = Attr->alignments_begin();
11962       for (const Expr *E : Attr->aligneds()) {
11963         E = E->IgnoreParenImpCasts();
11964         unsigned Pos;
11965         QualType ParmTy;
11966         if (isa<CXXThisExpr>(E)) {
11967           Pos = ParamPositions[FD];
11968           ParmTy = E->getType();
11969         } else {
11970           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11971                                 ->getCanonicalDecl();
11972           Pos = ParamPositions[PVD];
11973           ParmTy = PVD->getType();
11974         }
11975         ParamAttrs[Pos].Alignment =
11976             (*NI)
11977                 ? (*NI)->EvaluateKnownConstInt(C)
11978                 : llvm::APSInt::getUnsigned(
11979                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11980                           .getQuantity());
11981         ++NI;
11982       }
11983       // Mark linear parameters.
11984       auto *SI = Attr->steps_begin();
11985       auto *MI = Attr->modifiers_begin();
11986       for (const Expr *E : Attr->linears()) {
11987         E = E->IgnoreParenImpCasts();
11988         unsigned Pos;
11989         // Rescaling factor needed to compute the linear parameter
11990         // value in the mangled name.
11991         unsigned PtrRescalingFactor = 1;
11992         if (isa<CXXThisExpr>(E)) {
11993           Pos = ParamPositions[FD];
11994         } else {
11995           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11996                                 ->getCanonicalDecl();
11997           Pos = ParamPositions[PVD];
11998           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11999             PtrRescalingFactor = CGM.getContext()
12000                                      .getTypeSizeInChars(P->getPointeeType())
12001                                      .getQuantity();
12002         }
12003         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12004         ParamAttr.Kind = Linear;
12005         // Assuming a stride of 1, for `linear` without modifiers.
12006         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12007         if (*SI) {
12008           Expr::EvalResult Result;
12009           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12010             if (const auto *DRE =
12011                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12012               if (const auto *StridePVD =
12013                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12014                 ParamAttr.Kind = LinearWithVarStride;
12015                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12016                     ParamPositions[StridePVD->getCanonicalDecl()]);
12017               }
12018             }
12019           } else {
12020             ParamAttr.StrideOrArg = Result.Val.getInt();
12021           }
12022         }
12023         // If we are using a linear clause on a pointer, we need to
12024         // rescale the value of linear_step with the byte size of the
12025         // pointee type.
12026         if (Linear == ParamAttr.Kind)
12027           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12028         ++SI;
12029         ++MI;
12030       }
12031       llvm::APSInt VLENVal;
12032       SourceLocation ExprLoc;
12033       const Expr *VLENExpr = Attr->getSimdlen();
12034       if (VLENExpr) {
12035         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12036         ExprLoc = VLENExpr->getExprLoc();
12037       }
12038       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12039       if (CGM.getTriple().isX86()) {
12040         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12041       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12042         unsigned VLEN = VLENVal.getExtValue();
12043         StringRef MangledName = Fn->getName();
12044         if (CGM.getTarget().hasFeature("sve"))
12045           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12046                                          MangledName, 's', 128, Fn, ExprLoc);
12047         if (CGM.getTarget().hasFeature("neon"))
12048           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12049                                          MangledName, 'n', 128, Fn, ExprLoc);
12050       }
12051     }
12052     FD = FD->getPreviousDecl();
12053   }
12054 }
12055 
12056 namespace {
12057 /// Cleanup action for doacross support.
12058 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12059 public:
12060   static const int DoacrossFinArgs = 2;
12061 
12062 private:
12063   llvm::FunctionCallee RTLFn;
12064   llvm::Value *Args[DoacrossFinArgs];
12065 
12066 public:
12067   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12068                     ArrayRef<llvm::Value *> CallArgs)
12069       : RTLFn(RTLFn) {
12070     assert(CallArgs.size() == DoacrossFinArgs);
12071     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12072   }
12073   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12074     if (!CGF.HaveInsertPoint())
12075       return;
12076     CGF.EmitRuntimeCall(RTLFn, Args);
12077   }
12078 };
12079 } // namespace
12080 
12081 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12082                                        const OMPLoopDirective &D,
12083                                        ArrayRef<Expr *> NumIterations) {
12084   if (!CGF.HaveInsertPoint())
12085     return;
12086 
12087   ASTContext &C = CGM.getContext();
12088   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12089   RecordDecl *RD;
12090   if (KmpDimTy.isNull()) {
12091     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12092     //  kmp_int64 lo; // lower
12093     //  kmp_int64 up; // upper
12094     //  kmp_int64 st; // stride
12095     // };
12096     RD = C.buildImplicitRecord("kmp_dim");
12097     RD->startDefinition();
12098     addFieldToRecordDecl(C, RD, Int64Ty);
12099     addFieldToRecordDecl(C, RD, Int64Ty);
12100     addFieldToRecordDecl(C, RD, Int64Ty);
12101     RD->completeDefinition();
12102     KmpDimTy = C.getRecordType(RD);
12103   } else {
12104     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12105   }
12106   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12107   QualType ArrayTy =
12108       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12109 
12110   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12111   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12112   enum { LowerFD = 0, UpperFD, StrideFD };
12113   // Fill dims with data.
12114   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12115     LValue DimsLVal = CGF.MakeAddrLValue(
12116         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12117     // dims.upper = num_iterations;
12118     LValue UpperLVal = CGF.EmitLValueForField(
12119         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12120     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12121         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12122         Int64Ty, NumIterations[I]->getExprLoc());
12123     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12124     // dims.stride = 1;
12125     LValue StrideLVal = CGF.EmitLValueForField(
12126         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12127     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12128                           StrideLVal);
12129   }
12130 
12131   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12132   // kmp_int32 num_dims, struct kmp_dim * dims);
12133   llvm::Value *Args[] = {
12134       emitUpdateLocation(CGF, D.getBeginLoc()),
12135       getThreadID(CGF, D.getBeginLoc()),
12136       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12137       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12138           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12139           CGM.VoidPtrTy)};
12140 
12141   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12142       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12143   CGF.EmitRuntimeCall(RTLFn, Args);
12144   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12145       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12146   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12147       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12148   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12149                                              llvm::makeArrayRef(FiniArgs));
12150 }
12151 
12152 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12153                                           const OMPDependClause *C) {
12154   QualType Int64Ty =
12155       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12156   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12157   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12158       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12159   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12160   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12161     const Expr *CounterVal = C->getLoopData(I);
12162     assert(CounterVal);
12163     llvm::Value *CntVal = CGF.EmitScalarConversion(
12164         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12165         CounterVal->getExprLoc());
12166     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12167                           /*Volatile=*/false, Int64Ty);
12168   }
12169   llvm::Value *Args[] = {
12170       emitUpdateLocation(CGF, C->getBeginLoc()),
12171       getThreadID(CGF, C->getBeginLoc()),
12172       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12173   llvm::FunctionCallee RTLFn;
12174   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12175     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12176                                                   OMPRTL___kmpc_doacross_post);
12177   } else {
12178     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12179     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12180                                                   OMPRTL___kmpc_doacross_wait);
12181   }
12182   CGF.EmitRuntimeCall(RTLFn, Args);
12183 }
12184 
12185 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12186                                llvm::FunctionCallee Callee,
12187                                ArrayRef<llvm::Value *> Args) const {
12188   assert(Loc.isValid() && "Outlined function call location must be valid.");
12189   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12190 
12191   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12192     if (Fn->doesNotThrow()) {
12193       CGF.EmitNounwindRuntimeCall(Fn, Args);
12194       return;
12195     }
12196   }
12197   CGF.EmitRuntimeCall(Callee, Args);
12198 }
12199 
12200 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12201     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12202     ArrayRef<llvm::Value *> Args) const {
12203   emitCall(CGF, Loc, OutlinedFn, Args);
12204 }
12205 
12206 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12207   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12208     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12209       HasEmittedDeclareTargetRegion = true;
12210 }
12211 
12212 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12213                                              const VarDecl *NativeParam,
12214                                              const VarDecl *TargetParam) const {
12215   return CGF.GetAddrOfLocalVar(NativeParam);
12216 }
12217 
12218 /// Return allocator value from expression, or return a null allocator (default
12219 /// when no allocator specified).
12220 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12221                                     const Expr *Allocator) {
12222   llvm::Value *AllocVal;
12223   if (Allocator) {
12224     AllocVal = CGF.EmitScalarExpr(Allocator);
12225     // According to the standard, the original allocator type is a enum
12226     // (integer). Convert to pointer type, if required.
12227     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12228                                         CGF.getContext().VoidPtrTy,
12229                                         Allocator->getExprLoc());
12230   } else {
12231     // If no allocator specified, it defaults to the null allocator.
12232     AllocVal = llvm::Constant::getNullValue(
12233         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12234   }
12235   return AllocVal;
12236 }
12237 
12238 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12239                                                    const VarDecl *VD) {
12240   if (!VD)
12241     return Address::invalid();
12242   Address UntiedAddr = Address::invalid();
12243   Address UntiedRealAddr = Address::invalid();
12244   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12245   if (It != FunctionToUntiedTaskStackMap.end()) {
12246     const UntiedLocalVarsAddressesMap &UntiedData =
12247         UntiedLocalVarsStack[It->second];
12248     auto I = UntiedData.find(VD);
12249     if (I != UntiedData.end()) {
12250       UntiedAddr = I->second.first;
12251       UntiedRealAddr = I->second.second;
12252     }
12253   }
12254   const VarDecl *CVD = VD->getCanonicalDecl();
12255   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12256     // Use the default allocation.
12257     if (!isAllocatableDecl(VD))
12258       return UntiedAddr;
12259     llvm::Value *Size;
12260     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12261     if (CVD->getType()->isVariablyModifiedType()) {
12262       Size = CGF.getTypeSize(CVD->getType());
12263       // Align the size: ((size + align - 1) / align) * align
12264       Size = CGF.Builder.CreateNUWAdd(
12265           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12266       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12267       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12268     } else {
12269       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12270       Size = CGM.getSize(Sz.alignTo(Align));
12271     }
12272     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12273     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12274     const Expr *Allocator = AA->getAllocator();
12275     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12276     llvm::Value *Alignment =
12277         AA->getAlignment()
12278             ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
12279                                         CGM.SizeTy, /*isSigned=*/false)
12280             : nullptr;
12281     SmallVector<llvm::Value *, 4> Args;
12282     Args.push_back(ThreadID);
12283     if (Alignment)
12284       Args.push_back(Alignment);
12285     Args.push_back(Size);
12286     Args.push_back(AllocVal);
12287     llvm::omp::RuntimeFunction FnID =
12288         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12289     llvm::Value *Addr = CGF.EmitRuntimeCall(
12290         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12291         getName({CVD->getName(), ".void.addr"}));
12292     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12293         CGM.getModule(), OMPRTL___kmpc_free);
12294     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12295     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12296         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12297     if (UntiedAddr.isValid())
12298       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12299 
12300     // Cleanup action for allocate support.
12301     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12302       llvm::FunctionCallee RTLFn;
12303       SourceLocation::UIntTy LocEncoding;
12304       Address Addr;
12305       const Expr *AllocExpr;
12306 
12307     public:
12308       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12309                            SourceLocation::UIntTy LocEncoding, Address Addr,
12310                            const Expr *AllocExpr)
12311           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12312             AllocExpr(AllocExpr) {}
12313       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12314         if (!CGF.HaveInsertPoint())
12315           return;
12316         llvm::Value *Args[3];
12317         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12318             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12319         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12320             Addr.getPointer(), CGF.VoidPtrTy);
12321         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12322         Args[2] = AllocVal;
12323         CGF.EmitRuntimeCall(RTLFn, Args);
12324       }
12325     };
12326     Address VDAddr =
12327         UntiedRealAddr.isValid()
12328             ? UntiedRealAddr
12329             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12330     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12331         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12332         VDAddr, Allocator);
12333     if (UntiedRealAddr.isValid())
12334       if (auto *Region =
12335               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12336         Region->emitUntiedSwitch(CGF);
12337     return VDAddr;
12338   }
12339   return UntiedAddr;
12340 }
12341 
12342 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12343                                              const VarDecl *VD) const {
12344   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12345   if (It == FunctionToUntiedTaskStackMap.end())
12346     return false;
12347   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12348 }
12349 
12350 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12351     CodeGenModule &CGM, const OMPLoopDirective &S)
12352     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12353   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12354   if (!NeedToPush)
12355     return;
12356   NontemporalDeclsSet &DS =
12357       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12358   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12359     for (const Stmt *Ref : C->private_refs()) {
12360       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12361       const ValueDecl *VD;
12362       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12363         VD = DRE->getDecl();
12364       } else {
12365         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12366         assert((ME->isImplicitCXXThis() ||
12367                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12368                "Expected member of current class.");
12369         VD = ME->getMemberDecl();
12370       }
12371       DS.insert(VD);
12372     }
12373   }
12374 }
12375 
12376 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12377   if (!NeedToPush)
12378     return;
12379   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12380 }
12381 
12382 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12383     CodeGenFunction &CGF,
12384     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12385                           std::pair<Address, Address>> &LocalVars)
12386     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12387   if (!NeedToPush)
12388     return;
12389   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12390       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12391   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12392 }
12393 
12394 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12395   if (!NeedToPush)
12396     return;
12397   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12398 }
12399 
12400 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12401   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12402 
12403   return llvm::any_of(
12404       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12405       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12406 }
12407 
12408 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12409     const OMPExecutableDirective &S,
12410     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12411     const {
12412   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12413   // Vars in target/task regions must be excluded completely.
12414   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12415       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12416     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12417     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12418     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12419     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12420       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12421         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12422     }
12423   }
12424   // Exclude vars in private clauses.
12425   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12426     for (const Expr *Ref : C->varlists()) {
12427       if (!Ref->getType()->isScalarType())
12428         continue;
12429       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12430       if (!DRE)
12431         continue;
12432       NeedToCheckForLPCs.insert(DRE->getDecl());
12433     }
12434   }
12435   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12436     for (const Expr *Ref : C->varlists()) {
12437       if (!Ref->getType()->isScalarType())
12438         continue;
12439       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12440       if (!DRE)
12441         continue;
12442       NeedToCheckForLPCs.insert(DRE->getDecl());
12443     }
12444   }
12445   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12446     for (const Expr *Ref : C->varlists()) {
12447       if (!Ref->getType()->isScalarType())
12448         continue;
12449       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12450       if (!DRE)
12451         continue;
12452       NeedToCheckForLPCs.insert(DRE->getDecl());
12453     }
12454   }
12455   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12456     for (const Expr *Ref : C->varlists()) {
12457       if (!Ref->getType()->isScalarType())
12458         continue;
12459       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12460       if (!DRE)
12461         continue;
12462       NeedToCheckForLPCs.insert(DRE->getDecl());
12463     }
12464   }
12465   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12466     for (const Expr *Ref : C->varlists()) {
12467       if (!Ref->getType()->isScalarType())
12468         continue;
12469       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12470       if (!DRE)
12471         continue;
12472       NeedToCheckForLPCs.insert(DRE->getDecl());
12473     }
12474   }
12475   for (const Decl *VD : NeedToCheckForLPCs) {
12476     for (const LastprivateConditionalData &Data :
12477          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12478       if (Data.DeclToUniqueName.count(VD) > 0) {
12479         if (!Data.Disabled)
12480           NeedToAddForLPCsAsDisabled.insert(VD);
12481         break;
12482       }
12483     }
12484   }
12485 }
12486 
12487 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12488     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12489     : CGM(CGF.CGM),
12490       Action((CGM.getLangOpts().OpenMP >= 50 &&
12491               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12492                            [](const OMPLastprivateClause *C) {
12493                              return C->getKind() ==
12494                                     OMPC_LASTPRIVATE_conditional;
12495                            }))
12496                  ? ActionToDo::PushAsLastprivateConditional
12497                  : ActionToDo::DoNotPush) {
12498   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12499   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12500     return;
12501   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12502          "Expected a push action.");
12503   LastprivateConditionalData &Data =
12504       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12505   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12506     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12507       continue;
12508 
12509     for (const Expr *Ref : C->varlists()) {
12510       Data.DeclToUniqueName.insert(std::make_pair(
12511           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12512           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12513     }
12514   }
12515   Data.IVLVal = IVLVal;
12516   Data.Fn = CGF.CurFn;
12517 }
12518 
12519 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12520     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12521     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12522   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12523   if (CGM.getLangOpts().OpenMP < 50)
12524     return;
12525   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12526   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12527   if (!NeedToAddForLPCsAsDisabled.empty()) {
12528     Action = ActionToDo::DisableLastprivateConditional;
12529     LastprivateConditionalData &Data =
12530         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12531     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12532       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12533     Data.Fn = CGF.CurFn;
12534     Data.Disabled = true;
12535   }
12536 }
12537 
12538 CGOpenMPRuntime::LastprivateConditionalRAII
12539 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12540     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12541   return LastprivateConditionalRAII(CGF, S);
12542 }
12543 
12544 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12545   if (CGM.getLangOpts().OpenMP < 50)
12546     return;
12547   if (Action == ActionToDo::DisableLastprivateConditional) {
12548     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12549            "Expected list of disabled private vars.");
12550     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12551   }
12552   if (Action == ActionToDo::PushAsLastprivateConditional) {
12553     assert(
12554         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12555         "Expected list of lastprivate conditional vars.");
12556     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12557   }
12558 }
12559 
12560 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12561                                                         const VarDecl *VD) {
12562   ASTContext &C = CGM.getContext();
12563   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12564   if (I == LastprivateConditionalToTypes.end())
12565     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12566   QualType NewType;
12567   const FieldDecl *VDField;
12568   const FieldDecl *FiredField;
12569   LValue BaseLVal;
12570   auto VI = I->getSecond().find(VD);
12571   if (VI == I->getSecond().end()) {
12572     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12573     RD->startDefinition();
12574     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12575     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12576     RD->completeDefinition();
12577     NewType = C.getRecordType(RD);
12578     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12579     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12580     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12581   } else {
12582     NewType = std::get<0>(VI->getSecond());
12583     VDField = std::get<1>(VI->getSecond());
12584     FiredField = std::get<2>(VI->getSecond());
12585     BaseLVal = std::get<3>(VI->getSecond());
12586   }
12587   LValue FiredLVal =
12588       CGF.EmitLValueForField(BaseLVal, FiredField);
12589   CGF.EmitStoreOfScalar(
12590       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12591       FiredLVal);
12592   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12593 }
12594 
12595 namespace {
12596 /// Checks if the lastprivate conditional variable is referenced in LHS.
12597 class LastprivateConditionalRefChecker final
12598     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12599   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12600   const Expr *FoundE = nullptr;
12601   const Decl *FoundD = nullptr;
12602   StringRef UniqueDeclName;
12603   LValue IVLVal;
12604   llvm::Function *FoundFn = nullptr;
12605   SourceLocation Loc;
12606 
12607 public:
12608   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12609     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12610          llvm::reverse(LPM)) {
12611       auto It = D.DeclToUniqueName.find(E->getDecl());
12612       if (It == D.DeclToUniqueName.end())
12613         continue;
12614       if (D.Disabled)
12615         return false;
12616       FoundE = E;
12617       FoundD = E->getDecl()->getCanonicalDecl();
12618       UniqueDeclName = It->second;
12619       IVLVal = D.IVLVal;
12620       FoundFn = D.Fn;
12621       break;
12622     }
12623     return FoundE == E;
12624   }
12625   bool VisitMemberExpr(const MemberExpr *E) {
12626     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12627       return false;
12628     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12629          llvm::reverse(LPM)) {
12630       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12631       if (It == D.DeclToUniqueName.end())
12632         continue;
12633       if (D.Disabled)
12634         return false;
12635       FoundE = E;
12636       FoundD = E->getMemberDecl()->getCanonicalDecl();
12637       UniqueDeclName = It->second;
12638       IVLVal = D.IVLVal;
12639       FoundFn = D.Fn;
12640       break;
12641     }
12642     return FoundE == E;
12643   }
12644   bool VisitStmt(const Stmt *S) {
12645     for (const Stmt *Child : S->children()) {
12646       if (!Child)
12647         continue;
12648       if (const auto *E = dyn_cast<Expr>(Child))
12649         if (!E->isGLValue())
12650           continue;
12651       if (Visit(Child))
12652         return true;
12653     }
12654     return false;
12655   }
12656   explicit LastprivateConditionalRefChecker(
12657       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12658       : LPM(LPM) {}
12659   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12660   getFoundData() const {
12661     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12662   }
12663 };
12664 } // namespace
12665 
12666 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12667                                                        LValue IVLVal,
12668                                                        StringRef UniqueDeclName,
12669                                                        LValue LVal,
12670                                                        SourceLocation Loc) {
12671   // Last updated loop counter for the lastprivate conditional var.
12672   // int<xx> last_iv = 0;
12673   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12674   llvm::Constant *LastIV =
12675       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12676   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12677       IVLVal.getAlignment().getAsAlign());
12678   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12679 
12680   // Last value of the lastprivate conditional.
12681   // decltype(priv_a) last_a;
12682   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12683       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12684   Last->setAlignment(LVal.getAlignment().getAsAlign());
12685   LValue LastLVal = CGF.MakeAddrLValue(
12686       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12687 
12688   // Global loop counter. Required to handle inner parallel-for regions.
12689   // iv
12690   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12691 
12692   // #pragma omp critical(a)
12693   // if (last_iv <= iv) {
12694   //   last_iv = iv;
12695   //   last_a = priv_a;
12696   // }
12697   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12698                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12699     Action.Enter(CGF);
12700     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12701     // (last_iv <= iv) ? Check if the variable is updated and store new
12702     // value in global var.
12703     llvm::Value *CmpRes;
12704     if (IVLVal.getType()->isSignedIntegerType()) {
12705       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12706     } else {
12707       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12708              "Loop iteration variable must be integer.");
12709       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12710     }
12711     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12712     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12713     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12714     // {
12715     CGF.EmitBlock(ThenBB);
12716 
12717     //   last_iv = iv;
12718     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12719 
12720     //   last_a = priv_a;
12721     switch (CGF.getEvaluationKind(LVal.getType())) {
12722     case TEK_Scalar: {
12723       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12724       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12725       break;
12726     }
12727     case TEK_Complex: {
12728       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12729       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12730       break;
12731     }
12732     case TEK_Aggregate:
12733       llvm_unreachable(
12734           "Aggregates are not supported in lastprivate conditional.");
12735     }
12736     // }
12737     CGF.EmitBranch(ExitBB);
12738     // There is no need to emit line number for unconditional branch.
12739     (void)ApplyDebugLocation::CreateEmpty(CGF);
12740     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12741   };
12742 
12743   if (CGM.getLangOpts().OpenMPSimd) {
12744     // Do not emit as a critical region as no parallel region could be emitted.
12745     RegionCodeGenTy ThenRCG(CodeGen);
12746     ThenRCG(CGF);
12747   } else {
12748     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12749   }
12750 }
12751 
12752 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12753                                                          const Expr *LHS) {
12754   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12755     return;
12756   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12757   if (!Checker.Visit(LHS))
12758     return;
12759   const Expr *FoundE;
12760   const Decl *FoundD;
12761   StringRef UniqueDeclName;
12762   LValue IVLVal;
12763   llvm::Function *FoundFn;
12764   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12765       Checker.getFoundData();
12766   if (FoundFn != CGF.CurFn) {
12767     // Special codegen for inner parallel regions.
12768     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12769     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12770     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12771            "Lastprivate conditional is not found in outer region.");
12772     QualType StructTy = std::get<0>(It->getSecond());
12773     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12774     LValue PrivLVal = CGF.EmitLValue(FoundE);
12775     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12776         PrivLVal.getAddress(CGF),
12777         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12778         CGF.ConvertTypeForMem(StructTy));
12779     LValue BaseLVal =
12780         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12781     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12782     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12783                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12784                         FiredLVal, llvm::AtomicOrdering::Unordered,
12785                         /*IsVolatile=*/true, /*isInit=*/false);
12786     return;
12787   }
12788 
12789   // Private address of the lastprivate conditional in the current context.
12790   // priv_a
12791   LValue LVal = CGF.EmitLValue(FoundE);
12792   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12793                                    FoundE->getExprLoc());
12794 }
12795 
12796 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12797     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12798     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12799   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12800     return;
12801   auto Range = llvm::reverse(LastprivateConditionalStack);
12802   auto It = llvm::find_if(
12803       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12804   if (It == Range.end() || It->Fn != CGF.CurFn)
12805     return;
12806   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12807   assert(LPCI != LastprivateConditionalToTypes.end() &&
12808          "Lastprivates must be registered already.");
12809   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12810   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12811   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12812   for (const auto &Pair : It->DeclToUniqueName) {
12813     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12814     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12815       continue;
12816     auto I = LPCI->getSecond().find(Pair.first);
12817     assert(I != LPCI->getSecond().end() &&
12818            "Lastprivate must be rehistered already.");
12819     // bool Cmp = priv_a.Fired != 0;
12820     LValue BaseLVal = std::get<3>(I->getSecond());
12821     LValue FiredLVal =
12822         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12823     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12824     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12825     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12826     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12827     // if (Cmp) {
12828     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12829     CGF.EmitBlock(ThenBB);
12830     Address Addr = CGF.GetAddrOfLocalVar(VD);
12831     LValue LVal;
12832     if (VD->getType()->isReferenceType())
12833       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12834                                            AlignmentSource::Decl);
12835     else
12836       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12837                                 AlignmentSource::Decl);
12838     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12839                                      D.getBeginLoc());
12840     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12841     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12842     // }
12843   }
12844 }
12845 
12846 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12847     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12848     SourceLocation Loc) {
12849   if (CGF.getLangOpts().OpenMP < 50)
12850     return;
12851   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12852   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12853          "Unknown lastprivate conditional variable.");
12854   StringRef UniqueName = It->second;
12855   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12856   // The variable was not updated in the region - exit.
12857   if (!GV)
12858     return;
12859   LValue LPLVal = CGF.MakeAddrLValue(
12860       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12861       PrivLVal.getType().getNonReferenceType());
12862   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12863   CGF.EmitStoreOfScalar(Res, PrivLVal);
12864 }
12865 
12866 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12867     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12868     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12869   llvm_unreachable("Not supported in SIMD-only mode");
12870 }
12871 
12872 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12873     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12874     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12875   llvm_unreachable("Not supported in SIMD-only mode");
12876 }
12877 
12878 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12879     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12880     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12881     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12882     bool Tied, unsigned &NumberOfParts) {
12883   llvm_unreachable("Not supported in SIMD-only mode");
12884 }
12885 
12886 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12887                                            SourceLocation Loc,
12888                                            llvm::Function *OutlinedFn,
12889                                            ArrayRef<llvm::Value *> CapturedVars,
12890                                            const Expr *IfCond,
12891                                            llvm::Value *NumThreads) {
12892   llvm_unreachable("Not supported in SIMD-only mode");
12893 }
12894 
12895 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12896     CodeGenFunction &CGF, StringRef CriticalName,
12897     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12898     const Expr *Hint) {
12899   llvm_unreachable("Not supported in SIMD-only mode");
12900 }
12901 
12902 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12903                                            const RegionCodeGenTy &MasterOpGen,
12904                                            SourceLocation Loc) {
12905   llvm_unreachable("Not supported in SIMD-only mode");
12906 }
12907 
12908 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12909                                            const RegionCodeGenTy &MasterOpGen,
12910                                            SourceLocation Loc,
12911                                            const Expr *Filter) {
12912   llvm_unreachable("Not supported in SIMD-only mode");
12913 }
12914 
12915 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12916                                             SourceLocation Loc) {
12917   llvm_unreachable("Not supported in SIMD-only mode");
12918 }
12919 
12920 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12921     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12922     SourceLocation Loc) {
12923   llvm_unreachable("Not supported in SIMD-only mode");
12924 }
12925 
12926 void CGOpenMPSIMDRuntime::emitSingleRegion(
12927     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12928     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12929     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12930     ArrayRef<const Expr *> AssignmentOps) {
12931   llvm_unreachable("Not supported in SIMD-only mode");
12932 }
12933 
12934 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12935                                             const RegionCodeGenTy &OrderedOpGen,
12936                                             SourceLocation Loc,
12937                                             bool IsThreads) {
12938   llvm_unreachable("Not supported in SIMD-only mode");
12939 }
12940 
12941 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12942                                           SourceLocation Loc,
12943                                           OpenMPDirectiveKind Kind,
12944                                           bool EmitChecks,
12945                                           bool ForceSimpleCall) {
12946   llvm_unreachable("Not supported in SIMD-only mode");
12947 }
12948 
12949 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12950     CodeGenFunction &CGF, SourceLocation Loc,
12951     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12952     bool Ordered, const DispatchRTInput &DispatchValues) {
12953   llvm_unreachable("Not supported in SIMD-only mode");
12954 }
12955 
12956 void CGOpenMPSIMDRuntime::emitForStaticInit(
12957     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12958     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12959   llvm_unreachable("Not supported in SIMD-only mode");
12960 }
12961 
12962 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12963     CodeGenFunction &CGF, SourceLocation Loc,
12964     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12965   llvm_unreachable("Not supported in SIMD-only mode");
12966 }
12967 
12968 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12969                                                      SourceLocation Loc,
12970                                                      unsigned IVSize,
12971                                                      bool IVSigned) {
12972   llvm_unreachable("Not supported in SIMD-only mode");
12973 }
12974 
12975 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12976                                               SourceLocation Loc,
12977                                               OpenMPDirectiveKind DKind) {
12978   llvm_unreachable("Not supported in SIMD-only mode");
12979 }
12980 
12981 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12982                                               SourceLocation Loc,
12983                                               unsigned IVSize, bool IVSigned,
12984                                               Address IL, Address LB,
12985                                               Address UB, Address ST) {
12986   llvm_unreachable("Not supported in SIMD-only mode");
12987 }
12988 
12989 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12990                                                llvm::Value *NumThreads,
12991                                                SourceLocation Loc) {
12992   llvm_unreachable("Not supported in SIMD-only mode");
12993 }
12994 
12995 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12996                                              ProcBindKind ProcBind,
12997                                              SourceLocation Loc) {
12998   llvm_unreachable("Not supported in SIMD-only mode");
12999 }
13000 
13001 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13002                                                     const VarDecl *VD,
13003                                                     Address VDAddr,
13004                                                     SourceLocation Loc) {
13005   llvm_unreachable("Not supported in SIMD-only mode");
13006 }
13007 
13008 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13009     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13010     CodeGenFunction *CGF) {
13011   llvm_unreachable("Not supported in SIMD-only mode");
13012 }
13013 
13014 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13015     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13016   llvm_unreachable("Not supported in SIMD-only mode");
13017 }
13018 
13019 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13020                                     ArrayRef<const Expr *> Vars,
13021                                     SourceLocation Loc,
13022                                     llvm::AtomicOrdering AO) {
13023   llvm_unreachable("Not supported in SIMD-only mode");
13024 }
13025 
13026 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13027                                        const OMPExecutableDirective &D,
13028                                        llvm::Function *TaskFunction,
13029                                        QualType SharedsTy, Address Shareds,
13030                                        const Expr *IfCond,
13031                                        const OMPTaskDataTy &Data) {
13032   llvm_unreachable("Not supported in SIMD-only mode");
13033 }
13034 
13035 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13036     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13037     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13038     const Expr *IfCond, const OMPTaskDataTy &Data) {
13039   llvm_unreachable("Not supported in SIMD-only mode");
13040 }
13041 
13042 void CGOpenMPSIMDRuntime::emitReduction(
13043     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13044     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13045     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13046   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13047   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13048                                  ReductionOps, Options);
13049 }
13050 
13051 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13052     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13053     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13054   llvm_unreachable("Not supported in SIMD-only mode");
13055 }
13056 
13057 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13058                                                 SourceLocation Loc,
13059                                                 bool IsWorksharingReduction) {
13060   llvm_unreachable("Not supported in SIMD-only mode");
13061 }
13062 
13063 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13064                                                   SourceLocation Loc,
13065                                                   ReductionCodeGen &RCG,
13066                                                   unsigned N) {
13067   llvm_unreachable("Not supported in SIMD-only mode");
13068 }
13069 
13070 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13071                                                   SourceLocation Loc,
13072                                                   llvm::Value *ReductionsPtr,
13073                                                   LValue SharedLVal) {
13074   llvm_unreachable("Not supported in SIMD-only mode");
13075 }
13076 
13077 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13078                                            SourceLocation Loc,
13079                                            const OMPTaskDataTy &Data) {
13080   llvm_unreachable("Not supported in SIMD-only mode");
13081 }
13082 
13083 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13084     CodeGenFunction &CGF, SourceLocation Loc,
13085     OpenMPDirectiveKind CancelRegion) {
13086   llvm_unreachable("Not supported in SIMD-only mode");
13087 }
13088 
13089 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13090                                          SourceLocation Loc, const Expr *IfCond,
13091                                          OpenMPDirectiveKind CancelRegion) {
13092   llvm_unreachable("Not supported in SIMD-only mode");
13093 }
13094 
13095 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13096     const OMPExecutableDirective &D, StringRef ParentName,
13097     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13098     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13099   llvm_unreachable("Not supported in SIMD-only mode");
13100 }
13101 
13102 void CGOpenMPSIMDRuntime::emitTargetCall(
13103     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13104     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13105     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13106     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13107                                      const OMPLoopDirective &D)>
13108         SizeEmitter) {
13109   llvm_unreachable("Not supported in SIMD-only mode");
13110 }
13111 
13112 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13113   llvm_unreachable("Not supported in SIMD-only mode");
13114 }
13115 
13116 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13117   llvm_unreachable("Not supported in SIMD-only mode");
13118 }
13119 
13120 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13121   return false;
13122 }
13123 
13124 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13125                                         const OMPExecutableDirective &D,
13126                                         SourceLocation Loc,
13127                                         llvm::Function *OutlinedFn,
13128                                         ArrayRef<llvm::Value *> CapturedVars) {
13129   llvm_unreachable("Not supported in SIMD-only mode");
13130 }
13131 
13132 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13133                                              const Expr *NumTeams,
13134                                              const Expr *ThreadLimit,
13135                                              SourceLocation Loc) {
13136   llvm_unreachable("Not supported in SIMD-only mode");
13137 }
13138 
13139 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13140     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13141     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13142   llvm_unreachable("Not supported in SIMD-only mode");
13143 }
13144 
13145 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13146     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13147     const Expr *Device) {
13148   llvm_unreachable("Not supported in SIMD-only mode");
13149 }
13150 
13151 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13152                                            const OMPLoopDirective &D,
13153                                            ArrayRef<Expr *> NumIterations) {
13154   llvm_unreachable("Not supported in SIMD-only mode");
13155 }
13156 
13157 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13158                                               const OMPDependClause *C) {
13159   llvm_unreachable("Not supported in SIMD-only mode");
13160 }
13161 
13162 const VarDecl *
13163 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13164                                         const VarDecl *NativeParam) const {
13165   llvm_unreachable("Not supported in SIMD-only mode");
13166 }
13167 
13168 Address
13169 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13170                                          const VarDecl *NativeParam,
13171                                          const VarDecl *TargetParam) const {
13172   llvm_unreachable("Not supported in SIMD-only mode");
13173 }
13174