1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49 
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54   /// Kinds of OpenMP regions used in codegen.
55   enum CGOpenMPRegionKind {
56     /// Region with outlined function for standalone 'parallel'
57     /// directive.
58     ParallelOutlinedRegion,
59     /// Region with outlined function for standalone 'task' directive.
60     TaskOutlinedRegion,
61     /// Region for constructs that do not require function outlining,
62     /// like 'for', 'sections', 'atomic' etc. directives.
63     InlinedRegion,
64     /// Region with outlined function for standalone 'target' directive.
65     TargetRegion,
66   };
67 
68   CGOpenMPRegionInfo(const CapturedStmt &CS,
69                      const CGOpenMPRegionKind RegionKind,
70                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71                      bool HasCancel)
72       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 
75   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77                      bool HasCancel)
78       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79         Kind(Kind), HasCancel(HasCancel) {}
80 
81   /// Get a variable or parameter for storing global thread id
82   /// inside OpenMP construct.
83   virtual const VarDecl *getThreadIDVariable() const = 0;
84 
85   /// Emit the captured statement body.
86   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 
88   /// Get an LValue for the current ThreadID variable.
89   /// \return LValue for thread id variable. This LValue always has type int32*.
90   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 
92   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 
94   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 
96   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 
98   bool hasCancel() const { return HasCancel; }
99 
100   static bool classof(const CGCapturedStmtInfo *Info) {
101     return Info->getKind() == CR_OpenMP;
102   }
103 
104   ~CGOpenMPRegionInfo() override = default;
105 
106 protected:
107   CGOpenMPRegionKind RegionKind;
108   RegionCodeGenTy CodeGen;
109   OpenMPDirectiveKind Kind;
110   bool HasCancel;
111 };
112 
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
116   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117                              const RegionCodeGenTy &CodeGen,
118                              OpenMPDirectiveKind Kind, bool HasCancel,
119                              StringRef HelperName)
120       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121                            HasCancel),
122         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124   }
125 
126   /// Get a variable or parameter for storing global thread id
127   /// inside OpenMP construct.
128   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 
130   /// Get the name of the capture helper.
131   StringRef getHelperName() const override { return HelperName; }
132 
133   static bool classof(const CGCapturedStmtInfo *Info) {
134     return CGOpenMPRegionInfo::classof(Info) &&
135            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136                ParallelOutlinedRegion;
137   }
138 
139 private:
140   /// A variable or parameter storing global thread id for OpenMP
141   /// constructs.
142   const VarDecl *ThreadIDVar;
143   StringRef HelperName;
144 };
145 
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149   class UntiedTaskActionTy final : public PrePostActionTy {
150     bool Untied;
151     const VarDecl *PartIDVar;
152     const RegionCodeGenTy UntiedCodeGen;
153     llvm::SwitchInst *UntiedSwitch = nullptr;
154 
155   public:
156     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157                        const RegionCodeGenTy &UntiedCodeGen)
158         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159     void Enter(CodeGenFunction &CGF) override {
160       if (Untied) {
161         // Emit task switching point.
162         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163             CGF.GetAddrOfLocalVar(PartIDVar),
164             PartIDVar->getType()->castAs<PointerType>());
165         llvm::Value *Res =
166             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169         CGF.EmitBlock(DoneBB);
170         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173                               CGF.Builder.GetInsertBlock());
174         emitUntiedSwitch(CGF);
175       }
176     }
177     void emitUntiedSwitch(CodeGenFunction &CGF) const {
178       if (Untied) {
179         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180             CGF.GetAddrOfLocalVar(PartIDVar),
181             PartIDVar->getType()->castAs<PointerType>());
182         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183                               PartIdLVal);
184         UntiedCodeGen(CGF);
185         CodeGenFunction::JumpDest CurPoint =
186             CGF.getJumpDestInCurrentScope(".untied.next.");
187         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190                               CGF.Builder.GetInsertBlock());
191         CGF.EmitBranchThroughCleanup(CurPoint);
192         CGF.EmitBlock(CurPoint.getBlock());
193       }
194     }
195     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196   };
197   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198                                  const VarDecl *ThreadIDVar,
199                                  const RegionCodeGenTy &CodeGen,
200                                  OpenMPDirectiveKind Kind, bool HasCancel,
201                                  const UntiedTaskActionTy &Action)
202       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203         ThreadIDVar(ThreadIDVar), Action(Action) {
204     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205   }
206 
207   /// Get a variable or parameter for storing global thread id
208   /// inside OpenMP construct.
209   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 
211   /// Get an LValue for the current ThreadID variable.
212   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 
214   /// Get the name of the capture helper.
215   StringRef getHelperName() const override { return ".omp_outlined."; }
216 
217   void emitUntiedSwitch(CodeGenFunction &CGF) override {
218     Action.emitUntiedSwitch(CGF);
219   }
220 
221   static bool classof(const CGCapturedStmtInfo *Info) {
222     return CGOpenMPRegionInfo::classof(Info) &&
223            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224                TaskOutlinedRegion;
225   }
226 
227 private:
228   /// A variable or parameter storing global thread id for OpenMP
229   /// constructs.
230   const VarDecl *ThreadIDVar;
231   /// Action for emitting code for untied tasks.
232   const UntiedTaskActionTy &Action;
233 };
234 
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
239   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240                             const RegionCodeGenTy &CodeGen,
241                             OpenMPDirectiveKind Kind, bool HasCancel)
242       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243         OldCSI(OldCSI),
244         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 
246   // Retrieve the value of the context parameter.
247   llvm::Value *getContextValue() const override {
248     if (OuterRegionInfo)
249       return OuterRegionInfo->getContextValue();
250     llvm_unreachable("No context value for inlined OpenMP region");
251   }
252 
253   void setContextValue(llvm::Value *V) override {
254     if (OuterRegionInfo) {
255       OuterRegionInfo->setContextValue(V);
256       return;
257     }
258     llvm_unreachable("No context value for inlined OpenMP region");
259   }
260 
261   /// Lookup the captured field decl for a variable.
262   const FieldDecl *lookup(const VarDecl *VD) const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->lookup(VD);
265     // If there is no outer outlined region,no need to lookup in a list of
266     // captured variables, we can use the original one.
267     return nullptr;
268   }
269 
270   FieldDecl *getThisFieldDecl() const override {
271     if (OuterRegionInfo)
272       return OuterRegionInfo->getThisFieldDecl();
273     return nullptr;
274   }
275 
276   /// Get a variable or parameter for storing global thread id
277   /// inside OpenMP construct.
278   const VarDecl *getThreadIDVariable() const override {
279     if (OuterRegionInfo)
280       return OuterRegionInfo->getThreadIDVariable();
281     return nullptr;
282   }
283 
284   /// Get an LValue for the current ThreadID variable.
285   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286     if (OuterRegionInfo)
287       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288     llvm_unreachable("No LValue for inlined OpenMP construct");
289   }
290 
291   /// Get the name of the capture helper.
292   StringRef getHelperName() const override {
293     if (auto *OuterRegionInfo = getOldCSI())
294       return OuterRegionInfo->getHelperName();
295     llvm_unreachable("No helper name for inlined OpenMP construct");
296   }
297 
298   void emitUntiedSwitch(CodeGenFunction &CGF) override {
299     if (OuterRegionInfo)
300       OuterRegionInfo->emitUntiedSwitch(CGF);
301   }
302 
303   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 
305   static bool classof(const CGCapturedStmtInfo *Info) {
306     return CGOpenMPRegionInfo::classof(Info) &&
307            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308   }
309 
310   ~CGOpenMPInlinedRegionInfo() override = default;
311 
312 private:
313   /// CodeGen info about outer OpenMP region.
314   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315   CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317 
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
325   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
327       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328                            /*HasCancel=*/false),
329         HelperName(HelperName) {}
330 
331   /// This is unused for target regions because each starts executing
332   /// with a single thread.
333   const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 
335   /// Get the name of the capture helper.
336   StringRef getHelperName() const override { return HelperName; }
337 
338   static bool classof(const CGCapturedStmtInfo *Info) {
339     return CGOpenMPRegionInfo::classof(Info) &&
340            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341   }
342 
343 private:
344   StringRef HelperName;
345 };
346 
347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348   llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
354   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356                                   OMPD_unknown,
357                                   /*HasCancel=*/false),
358         PrivScope(CGF) {
359     // Make sure the globals captured in the provided statement are local by
360     // using the privatization logic. We assume the same variable is not
361     // captured more than once.
362     for (const auto &C : CS.captures()) {
363       if (!C.capturesVariable() && !C.capturesVariableByCopy())
364         continue;
365 
366       const VarDecl *VD = C.getCapturedVar();
367       if (VD->isLocalVarDeclOrParm())
368         continue;
369 
370       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371                       /*RefersToEnclosingVariableOrCapture=*/false,
372                       VD->getType().getNonReferenceType(), VK_LValue,
373                       C.getLocation());
374       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
375     }
376     (void)PrivScope.Privatize();
377   }
378 
379   /// Lookup the captured field decl for a variable.
380   const FieldDecl *lookup(const VarDecl *VD) const override {
381     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382       return FD;
383     return nullptr;
384   }
385 
386   /// Emit the captured statement body.
387   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388     llvm_unreachable("No body for expressions");
389   }
390 
391   /// Get a variable or parameter for storing global thread id
392   /// inside OpenMP construct.
393   const VarDecl *getThreadIDVariable() const override {
394     llvm_unreachable("No thread id for expressions");
395   }
396 
397   /// Get the name of the capture helper.
398   StringRef getHelperName() const override {
399     llvm_unreachable("No helper name for expressions");
400   }
401 
402   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405   /// Private scope to capture global variables.
406   CodeGenFunction::OMPPrivateScope PrivScope;
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411   CodeGenFunction &CGF;
412   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
413   FieldDecl *LambdaThisCaptureField = nullptr;
414   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415   bool NoInheritance = false;
416 
417 public:
418   /// Constructs region for combined constructs.
419   /// \param CodeGen Code generation sequence for combined directives. Includes
420   /// a list of functions used for code generation of implicitly inlined
421   /// regions.
422   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423                           OpenMPDirectiveKind Kind, bool HasCancel,
424                           bool NoInheritance = true)
425       : CGF(CGF), NoInheritance(NoInheritance) {
426     // Start emission for the construct.
427     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429     if (NoInheritance) {
430       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432       CGF.LambdaThisCaptureField = nullptr;
433       BlockInfo = CGF.BlockInfo;
434       CGF.BlockInfo = nullptr;
435     }
436   }
437 
438   ~InlinedOpenMPRegionRAII() {
439     // Restore original CapturedStmtInfo only if we're done with code emission.
440     auto *OldCSI =
441         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442     delete CGF.CapturedStmtInfo;
443     CGF.CapturedStmtInfo = OldCSI;
444     if (NoInheritance) {
445       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447       CGF.BlockInfo = BlockInfo;
448     }
449   }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456   /// Use trampoline for internal microtask.
457   OMP_IDENT_IMD = 0x01,
458   /// Use c-style ident structure.
459   OMP_IDENT_KMPC = 0x02,
460   /// Atomic reduction option for kmpc_reduce.
461   OMP_ATOMIC_REDUCE = 0x10,
462   /// Explicit 'barrier' directive.
463   OMP_IDENT_BARRIER_EXPL = 0x20,
464   /// Implicit barrier in code.
465   OMP_IDENT_BARRIER_IMPL = 0x40,
466   /// Implicit barrier in 'for' directive.
467   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468   /// Implicit barrier in 'sections' directive.
469   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470   /// Implicit barrier in 'single' directive.
471   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472   /// Call of __kmp_for_static_init for static loop.
473   OMP_IDENT_WORK_LOOP = 0x200,
474   /// Call of __kmp_for_static_init for sections.
475   OMP_IDENT_WORK_SECTIONS = 0x400,
476   /// Call of __kmp_for_static_init for distribute.
477   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 namespace {
482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
483 /// Values for bit flags for marking which requires clauses have been used.
484 enum OpenMPOffloadingRequiresDirFlags : int64_t {
485   /// flag undefined.
486   OMP_REQ_UNDEFINED               = 0x000,
487   /// no requires clause present.
488   OMP_REQ_NONE                    = 0x001,
489   /// reverse_offload clause.
490   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
491   /// unified_address clause.
492   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
493   /// unified_shared_memory clause.
494   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
495   /// dynamic_allocators clause.
496   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
497   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
498 };
499 
500 enum OpenMPOffloadingReservedDeviceIDs {
501   /// Device ID if the device was not defined, runtime should get it
502   /// from environment variables in the spec.
503   OMP_DEVICEID_UNDEF = -1,
504 };
505 } // anonymous namespace
506 
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
513 ///                                  see above  */
514 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
515 ///                                  KMP_IDENT_KMPC identifies this union
516 ///                                  member  */
517 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
518 ///                                  see above */
519 ///#if USE_ITT_BUILD
520 ///                            /*  but currently used for storing
521 ///                                region-specific ITT */
522 ///                            /*  contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
525 ///                                 C++  */
526 ///    char const *psource;    /**< String describing the source location.
527 ///                            The string is composed of semi-colon separated
528 //                             fields which describe the source file,
529 ///                            the function and a pair of line numbers that
530 ///                            delimit the construct.
531 ///                             */
532 /// } ident_t;
533 enum IdentFieldIndex {
534   /// might be used in Fortran
535   IdentField_Reserved_1,
536   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537   IdentField_Flags,
538   /// Not really used in Fortran any more
539   IdentField_Reserved_2,
540   /// Source[4] in Fortran, do not use for C++
541   IdentField_Reserved_3,
542   /// String describing the source location. The string is composed of
543   /// semi-colon separated fields which describe the source file, the function
544   /// and a pair of line numbers that delimit the construct.
545   IdentField_PSource
546 };
547 
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType {
551   /// Lower bound for default (unordered) versions.
552   OMP_sch_lower = 32,
553   OMP_sch_static_chunked = 33,
554   OMP_sch_static = 34,
555   OMP_sch_dynamic_chunked = 35,
556   OMP_sch_guided_chunked = 36,
557   OMP_sch_runtime = 37,
558   OMP_sch_auto = 38,
559   /// static with chunk adjustment (e.g., simd)
560   OMP_sch_static_balanced_chunked = 45,
561   /// Lower bound for 'ordered' versions.
562   OMP_ord_lower = 64,
563   OMP_ord_static_chunked = 65,
564   OMP_ord_static = 66,
565   OMP_ord_dynamic_chunked = 67,
566   OMP_ord_guided_chunked = 68,
567   OMP_ord_runtime = 69,
568   OMP_ord_auto = 70,
569   OMP_sch_default = OMP_sch_static,
570   /// dist_schedule types
571   OMP_dist_sch_static_chunked = 91,
572   OMP_dist_sch_static = 92,
573   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574   /// Set if the monotonic schedule modifier was present.
575   OMP_sch_modifier_monotonic = (1 << 29),
576   /// Set if the nonmonotonic schedule modifier was present.
577   OMP_sch_modifier_nonmonotonic = (1 << 30),
578 };
579 
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581 /// region.
582 class CleanupTy final : public EHScopeStack::Cleanup {
583   PrePostActionTy *Action;
584 
585 public:
586   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588     if (!CGF.HaveInsertPoint())
589       return;
590     Action->Exit(CGF);
591   }
592 };
593 
594 } // anonymous namespace
595 
596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
597   CodeGenFunction::RunCleanupsScope Scope(CGF);
598   if (PrePostAction) {
599     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600     Callback(CodeGen, CGF, *PrePostAction);
601   } else {
602     PrePostActionTy Action;
603     Callback(CodeGen, CGF, Action);
604   }
605 }
606 
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl *
610 getReductionInit(const Expr *ReductionOp) {
611   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613       if (const auto *DRE =
614               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616           return DRD;
617   return nullptr;
618 }
619 
620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
621                                              const OMPDeclareReductionDecl *DRD,
622                                              const Expr *InitOp,
623                                              Address Private, Address Original,
624                                              QualType Ty) {
625   if (DRD->getInitializer()) {
626     std::pair<llvm::Function *, llvm::Function *> Reduction =
627         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
628     const auto *CE = cast<CallExpr>(InitOp);
629     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632     const auto *LHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634     const auto *RHSDRE =
635         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   if (DRD)
691     SrcAddr =
692         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
693 
694   llvm::Value *SrcBegin = nullptr;
695   if (DRD)
696     SrcBegin = SrcAddr.getPointer();
697   llvm::Value *DestBegin = DestAddr.getPointer();
698   // Cast from pointer to array type to pointer to single element.
699   llvm::Value *DestEnd =
700       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
701   // The basic structure here is a while-do loop.
702   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704   llvm::Value *IsEmpty =
705       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707 
708   // Enter the loop body, making that address the current address.
709   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710   CGF.EmitBlock(BodyBB);
711 
712   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713 
714   llvm::PHINode *SrcElementPHI = nullptr;
715   Address SrcElementCurrent = Address::invalid();
716   if (DRD) {
717     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718                                           "omp.arraycpy.srcElementPast");
719     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720     SrcElementCurrent =
721         Address(SrcElementPHI, SrcAddr.getElementType(),
722                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723   }
724   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726   DestElementPHI->addIncoming(DestBegin, EntryBB);
727   Address DestElementCurrent =
728       Address(DestElementPHI, DestAddr.getElementType(),
729               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731   // Emit copy.
732   {
733     CodeGenFunction::RunCleanupsScope InitScope(CGF);
734     if (EmitDeclareReductionInit) {
735       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736                                        SrcElementCurrent, ElementTy);
737     } else
738       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739                            /*IsInitializer=*/false);
740   }
741 
742   if (DRD) {
743     // Shift the address forward by one element.
744     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
746         "omp.arraycpy.dest.element");
747     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748   }
749 
750   // Shift the address forward by one element.
751   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
753       "omp.arraycpy.dest.element");
754   // Check whether we've reached the end.
755   llvm::Value *Done =
756       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
757   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
758   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
759 
760   // Done.
761   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
762 }
763 
764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
765   return CGF.EmitOMPSharedLValue(E);
766 }
767 
768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
769                                             const Expr *E) {
770   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
771     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
772   return LValue();
773 }
774 
775 void ReductionCodeGen::emitAggregateInitialization(
776     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
777     const OMPDeclareReductionDecl *DRD) {
778   // Emit VarDecl with copy init for arrays.
779   // Get the address of the original variable captured in current
780   // captured region.
781   const auto *PrivateVD =
782       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
783   bool EmitDeclareReductionInit =
784       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
785   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
786                        EmitDeclareReductionInit,
787                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
788                                                 : PrivateVD->getInit(),
789                        DRD, SharedAddr);
790 }
791 
792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
793                                    ArrayRef<const Expr *> Origs,
794                                    ArrayRef<const Expr *> Privates,
795                                    ArrayRef<const Expr *> ReductionOps) {
796   ClausesData.reserve(Shareds.size());
797   SharedAddresses.reserve(Shareds.size());
798   Sizes.reserve(Shareds.size());
799   BaseDecls.reserve(Shareds.size());
800   const auto *IOrig = Origs.begin();
801   const auto *IPriv = Privates.begin();
802   const auto *IRed = ReductionOps.begin();
803   for (const Expr *Ref : Shareds) {
804     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
805     std::advance(IOrig, 1);
806     std::advance(IPriv, 1);
807     std::advance(IRed, 1);
808   }
809 }
810 
811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
812   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
813          "Number of generated lvalues must be exactly N.");
814   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
815   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
816   SharedAddresses.emplace_back(First, Second);
817   if (ClausesData[N].Shared == ClausesData[N].Ref) {
818     OrigAddresses.emplace_back(First, Second);
819   } else {
820     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
821     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
822     OrigAddresses.emplace_back(First, Second);
823   }
824 }
825 
826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
827   const auto *PrivateVD =
828       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
829   QualType PrivateType = PrivateVD->getType();
830   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
831   if (!PrivateType->isVariablyModifiedType()) {
832     Sizes.emplace_back(
833         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
834         nullptr);
835     return;
836   }
837   llvm::Value *Size;
838   llvm::Value *SizeInChars;
839   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
840   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
841   if (AsArraySection) {
842     Size = CGF.Builder.CreatePtrDiff(ElemType,
843                                      OrigAddresses[N].second.getPointer(CGF),
844                                      OrigAddresses[N].first.getPointer(CGF));
845     Size = CGF.Builder.CreateNUWAdd(
846         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
847     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
848   } else {
849     SizeInChars =
850         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
851     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
852   }
853   Sizes.emplace_back(SizeInChars, Size);
854   CodeGenFunction::OpaqueValueMapping OpaqueMap(
855       CGF,
856       cast<OpaqueValueExpr>(
857           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
858       RValue::get(Size));
859   CGF.EmitVariablyModifiedType(PrivateType);
860 }
861 
862 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
863                                          llvm::Value *Size) {
864   const auto *PrivateVD =
865       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
866   QualType PrivateType = PrivateVD->getType();
867   if (!PrivateType->isVariablyModifiedType()) {
868     assert(!Size && !Sizes[N].second &&
869            "Size should be nullptr for non-variably modified reduction "
870            "items.");
871     return;
872   }
873   CodeGenFunction::OpaqueValueMapping OpaqueMap(
874       CGF,
875       cast<OpaqueValueExpr>(
876           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
877       RValue::get(Size));
878   CGF.EmitVariablyModifiedType(PrivateType);
879 }
880 
881 void ReductionCodeGen::emitInitialization(
882     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
883     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
884   assert(SharedAddresses.size() > N && "No variable was generated");
885   const auto *PrivateVD =
886       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
887   const OMPDeclareReductionDecl *DRD =
888       getReductionInit(ClausesData[N].ReductionOp);
889   QualType PrivateType = PrivateVD->getType();
890   PrivateAddr = CGF.Builder.CreateElementBitCast(
891       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
892   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
893     if (DRD && DRD->getInitializer())
894       (void)DefaultInit(CGF);
895     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
896   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
897     (void)DefaultInit(CGF);
898     QualType SharedType = SharedAddresses[N].first.getType();
899     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
900                                      PrivateAddr, SharedAddr, SharedType);
901   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
902              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
903     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
904                          PrivateVD->getType().getQualifiers(),
905                          /*IsInitializer=*/false);
906   }
907 }
908 
909 bool ReductionCodeGen::needCleanups(unsigned N) {
910   const auto *PrivateVD =
911       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
912   QualType PrivateType = PrivateVD->getType();
913   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
914   return DTorKind != QualType::DK_none;
915 }
916 
917 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
918                                     Address PrivateAddr) {
919   const auto *PrivateVD =
920       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
921   QualType PrivateType = PrivateVD->getType();
922   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
923   if (needCleanups(N)) {
924     PrivateAddr = CGF.Builder.CreateElementBitCast(
925         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
926     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
927   }
928 }
929 
930 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
931                           LValue BaseLV) {
932   BaseTy = BaseTy.getNonReferenceType();
933   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
934          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
935     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
936       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
937     } else {
938       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
939       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
940     }
941     BaseTy = BaseTy->getPointeeType();
942   }
943   return CGF.MakeAddrLValue(
944       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
945                                        CGF.ConvertTypeForMem(ElTy)),
946       BaseLV.getType(), BaseLV.getBaseInfo(),
947       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
948 }
949 
950 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
951                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
952                           llvm::Value *Addr) {
953   Address Tmp = Address::invalid();
954   Address TopTmp = Address::invalid();
955   Address MostTopTmp = Address::invalid();
956   BaseTy = BaseTy.getNonReferenceType();
957   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
958          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
959     Tmp = CGF.CreateMemTemp(BaseTy);
960     if (TopTmp.isValid())
961       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
962     else
963       MostTopTmp = Tmp;
964     TopTmp = Tmp;
965     BaseTy = BaseTy->getPointeeType();
966   }
967   llvm::Type *Ty = BaseLVType;
968   if (Tmp.isValid())
969     Ty = Tmp.getElementType();
970   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
971   if (Tmp.isValid()) {
972     CGF.Builder.CreateStore(Addr, Tmp);
973     return MostTopTmp;
974   }
975   return Address::deprecated(Addr, BaseLVAlignment);
976 }
977 
978 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
979   const VarDecl *OrigVD = nullptr;
980   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
981     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
982     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
983       Base = TempOASE->getBase()->IgnoreParenImpCasts();
984     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
985       Base = TempASE->getBase()->IgnoreParenImpCasts();
986     DE = cast<DeclRefExpr>(Base);
987     OrigVD = cast<VarDecl>(DE->getDecl());
988   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
989     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
990     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
991       Base = TempASE->getBase()->IgnoreParenImpCasts();
992     DE = cast<DeclRefExpr>(Base);
993     OrigVD = cast<VarDecl>(DE->getDecl());
994   }
995   return OrigVD;
996 }
997 
998 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
999                                                Address PrivateAddr) {
1000   const DeclRefExpr *DE;
1001   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1002     BaseDecls.emplace_back(OrigVD);
1003     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1004     LValue BaseLValue =
1005         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1006                     OriginalBaseLValue);
1007     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1008     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1009         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1010         SharedAddr.getPointer());
1011     llvm::Value *PrivatePointer =
1012         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1013             PrivateAddr.getPointer(), SharedAddr.getType());
1014     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1015         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1016     return castToBase(CGF, OrigVD->getType(),
1017                       SharedAddresses[N].first.getType(),
1018                       OriginalBaseLValue.getAddress(CGF).getType(),
1019                       OriginalBaseLValue.getAlignment(), Ptr);
1020   }
1021   BaseDecls.emplace_back(
1022       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1023   return PrivateAddr;
1024 }
1025 
1026 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1027   const OMPDeclareReductionDecl *DRD =
1028       getReductionInit(ClausesData[N].ReductionOp);
1029   return DRD && DRD->getInitializer();
1030 }
1031 
1032 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1033   return CGF.EmitLoadOfPointerLValue(
1034       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1035       getThreadIDVariable()->getType()->castAs<PointerType>());
1036 }
1037 
1038 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1039   if (!CGF.HaveInsertPoint())
1040     return;
1041   // 1.2.2 OpenMP Language Terminology
1042   // Structured block - An executable statement with a single entry at the
1043   // top and a single exit at the bottom.
1044   // The point of exit cannot be a branch out of the structured block.
1045   // longjmp() and throw() must not violate the entry/exit criteria.
1046   CGF.EHStack.pushTerminate();
1047   if (S)
1048     CGF.incrementProfileCounter(S);
1049   CodeGen(CGF);
1050   CGF.EHStack.popTerminate();
1051 }
1052 
1053 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1054     CodeGenFunction &CGF) {
1055   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1056                             getThreadIDVariable()->getType(),
1057                             AlignmentSource::Decl);
1058 }
1059 
1060 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1061                                        QualType FieldTy) {
1062   auto *Field = FieldDecl::Create(
1063       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1064       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1065       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1066   Field->setAccess(AS_public);
1067   DC->addDecl(Field);
1068   return Field;
1069 }
1070 
1071 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1072                                  StringRef Separator)
1073     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1074       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1075   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1076 
1077   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1078   OMPBuilder.initialize();
1079   loadOffloadInfoMetadata();
1080 }
1081 
1082 void CGOpenMPRuntime::clear() {
1083   InternalVars.clear();
1084   // Clean non-target variable declarations possibly used only in debug info.
1085   for (const auto &Data : EmittedNonTargetVariables) {
1086     if (!Data.getValue().pointsToAliveValue())
1087       continue;
1088     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1089     if (!GV)
1090       continue;
1091     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1092       continue;
1093     GV->eraseFromParent();
1094   }
1095 }
1096 
1097 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1098   SmallString<128> Buffer;
1099   llvm::raw_svector_ostream OS(Buffer);
1100   StringRef Sep = FirstSeparator;
1101   for (StringRef Part : Parts) {
1102     OS << Sep << Part;
1103     Sep = Separator;
1104   }
1105   return std::string(OS.str());
1106 }
1107 
1108 static llvm::Function *
1109 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1110                           const Expr *CombinerInitializer, const VarDecl *In,
1111                           const VarDecl *Out, bool IsCombiner) {
1112   // void .omp_combiner.(Ty *in, Ty *out);
1113   ASTContext &C = CGM.getContext();
1114   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1115   FunctionArgList Args;
1116   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1117                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1118   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1119                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1120   Args.push_back(&OmpOutParm);
1121   Args.push_back(&OmpInParm);
1122   const CGFunctionInfo &FnInfo =
1123       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1124   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1125   std::string Name = CGM.getOpenMPRuntime().getName(
1126       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1127   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1128                                     Name, &CGM.getModule());
1129   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1130   if (CGM.getLangOpts().Optimize) {
1131     Fn->removeFnAttr(llvm::Attribute::NoInline);
1132     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1133     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1134   }
1135   CodeGenFunction CGF(CGM);
1136   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1137   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1138   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1139                     Out->getLocation());
1140   CodeGenFunction::OMPPrivateScope Scope(CGF);
1141   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1142   Scope.addPrivate(
1143       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1144               .getAddress(CGF));
1145   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1146   Scope.addPrivate(
1147       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1148                .getAddress(CGF));
1149   (void)Scope.Privatize();
1150   if (!IsCombiner && Out->hasInit() &&
1151       !CGF.isTrivialInitializer(Out->getInit())) {
1152     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1153                          Out->getType().getQualifiers(),
1154                          /*IsInitializer=*/true);
1155   }
1156   if (CombinerInitializer)
1157     CGF.EmitIgnoredExpr(CombinerInitializer);
1158   Scope.ForceCleanup();
1159   CGF.FinishFunction();
1160   return Fn;
1161 }
1162 
1163 void CGOpenMPRuntime::emitUserDefinedReduction(
1164     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1165   if (UDRMap.count(D) > 0)
1166     return;
1167   llvm::Function *Combiner = emitCombinerOrInitializer(
1168       CGM, D->getType(), D->getCombiner(),
1169       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1170       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1171       /*IsCombiner=*/true);
1172   llvm::Function *Initializer = nullptr;
1173   if (const Expr *Init = D->getInitializer()) {
1174     Initializer = emitCombinerOrInitializer(
1175         CGM, D->getType(),
1176         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1177                                                                      : nullptr,
1178         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1179         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1180         /*IsCombiner=*/false);
1181   }
1182   UDRMap.try_emplace(D, Combiner, Initializer);
1183   if (CGF) {
1184     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1185     Decls.second.push_back(D);
1186   }
1187 }
1188 
1189 std::pair<llvm::Function *, llvm::Function *>
1190 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1191   auto I = UDRMap.find(D);
1192   if (I != UDRMap.end())
1193     return I->second;
1194   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1195   return UDRMap.lookup(D);
1196 }
1197 
1198 namespace {
1199 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1200 // Builder if one is present.
1201 struct PushAndPopStackRAII {
1202   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1203                       bool HasCancel, llvm::omp::Directive Kind)
1204       : OMPBuilder(OMPBuilder) {
1205     if (!OMPBuilder)
1206       return;
1207 
1208     // The following callback is the crucial part of clangs cleanup process.
1209     //
1210     // NOTE:
1211     // Once the OpenMPIRBuilder is used to create parallel regions (and
1212     // similar), the cancellation destination (Dest below) is determined via
1213     // IP. That means if we have variables to finalize we split the block at IP,
1214     // use the new block (=BB) as destination to build a JumpDest (via
1215     // getJumpDestInCurrentScope(BB)) which then is fed to
1216     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1217     // to push & pop an FinalizationInfo object.
1218     // The FiniCB will still be needed but at the point where the
1219     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1220     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1221       assert(IP.getBlock()->end() == IP.getPoint() &&
1222              "Clang CG should cause non-terminated block!");
1223       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1224       CGF.Builder.restoreIP(IP);
1225       CodeGenFunction::JumpDest Dest =
1226           CGF.getOMPCancelDestination(OMPD_parallel);
1227       CGF.EmitBranchThroughCleanup(Dest);
1228     };
1229 
1230     // TODO: Remove this once we emit parallel regions through the
1231     //       OpenMPIRBuilder as it can do this setup internally.
1232     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1233     OMPBuilder->pushFinalizationCB(std::move(FI));
1234   }
1235   ~PushAndPopStackRAII() {
1236     if (OMPBuilder)
1237       OMPBuilder->popFinalizationCB();
1238   }
1239   llvm::OpenMPIRBuilder *OMPBuilder;
1240 };
1241 } // namespace
1242 
1243 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1244     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1245     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1246     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1247   assert(ThreadIDVar->getType()->isPointerType() &&
1248          "thread id variable must be of type kmp_int32 *");
1249   CodeGenFunction CGF(CGM, true);
1250   bool HasCancel = false;
1251   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1252     HasCancel = OPD->hasCancel();
1253   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1254     HasCancel = OPD->hasCancel();
1255   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1256     HasCancel = OPSD->hasCancel();
1257   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1258     HasCancel = OPFD->hasCancel();
1259   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1260     HasCancel = OPFD->hasCancel();
1261   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263   else if (const auto *OPFD =
1264                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1265     HasCancel = OPFD->hasCancel();
1266   else if (const auto *OPFD =
1267                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269 
1270   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1271   //       parallel region to make cancellation barriers work properly.
1272   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1273   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1274   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1275                                     HasCancel, OutlinedHelperName);
1276   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1277   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1278 }
1279 
1280 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1281     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1282     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1283   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1284   return emitParallelOrTeamsOutlinedFunction(
1285       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1286 }
1287 
1288 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1289     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1290     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1291   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1292   return emitParallelOrTeamsOutlinedFunction(
1293       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1294 }
1295 
1296 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1297     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1298     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1299     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1300     bool Tied, unsigned &NumberOfParts) {
1301   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1302                                               PrePostActionTy &) {
1303     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1304     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1305     llvm::Value *TaskArgs[] = {
1306         UpLoc, ThreadID,
1307         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1308                                     TaskTVar->getType()->castAs<PointerType>())
1309             .getPointer(CGF)};
1310     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1311                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1312                         TaskArgs);
1313   };
1314   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1315                                                             UntiedCodeGen);
1316   CodeGen.setAction(Action);
1317   assert(!ThreadIDVar->getType()->isPointerType() &&
1318          "thread id variable must be of type kmp_int32 for tasks");
1319   const OpenMPDirectiveKind Region =
1320       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1321                                                       : OMPD_task;
1322   const CapturedStmt *CS = D.getCapturedStmt(Region);
1323   bool HasCancel = false;
1324   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1327     HasCancel = TD->hasCancel();
1328   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1329     HasCancel = TD->hasCancel();
1330   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1331     HasCancel = TD->hasCancel();
1332 
1333   CodeGenFunction CGF(CGM, true);
1334   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1335                                         InnermostKind, HasCancel, Action);
1336   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1337   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1338   if (!Tied)
1339     NumberOfParts = Action.getNumberOfParts();
1340   return Res;
1341 }
1342 
1343 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1344                              const RecordDecl *RD, const CGRecordLayout &RL,
1345                              ArrayRef<llvm::Constant *> Data) {
1346   llvm::StructType *StructTy = RL.getLLVMType();
1347   unsigned PrevIdx = 0;
1348   ConstantInitBuilder CIBuilder(CGM);
1349   const auto *DI = Data.begin();
1350   for (const FieldDecl *FD : RD->fields()) {
1351     unsigned Idx = RL.getLLVMFieldNo(FD);
1352     // Fill the alignment.
1353     for (unsigned I = PrevIdx; I < Idx; ++I)
1354       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1355     PrevIdx = Idx + 1;
1356     Fields.add(*DI);
1357     ++DI;
1358   }
1359 }
1360 
1361 template <class... As>
1362 static llvm::GlobalVariable *
1363 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1364                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1365                    As &&... Args) {
1366   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1367   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1368   ConstantInitBuilder CIBuilder(CGM);
1369   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1370   buildStructValue(Fields, CGM, RD, RL, Data);
1371   return Fields.finishAndCreateGlobal(
1372       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1373       std::forward<As>(Args)...);
1374 }
1375 
1376 template <typename T>
1377 static void
1378 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1379                                          ArrayRef<llvm::Constant *> Data,
1380                                          T &Parent) {
1381   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1382   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1383   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1384   buildStructValue(Fields, CGM, RD, RL, Data);
1385   Fields.finishAndAddTo(Parent);
1386 }
1387 
1388 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1389                                              bool AtCurrentPoint) {
1390   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1391   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1392 
1393   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1394   if (AtCurrentPoint) {
1395     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1396         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1397   } else {
1398     Elem.second.ServiceInsertPt =
1399         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1400     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1401   }
1402 }
1403 
1404 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1405   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1406   if (Elem.second.ServiceInsertPt) {
1407     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1408     Elem.second.ServiceInsertPt = nullptr;
1409     Ptr->eraseFromParent();
1410   }
1411 }
1412 
1413 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1414                                                   SourceLocation Loc,
1415                                                   SmallString<128> &Buffer) {
1416   llvm::raw_svector_ostream OS(Buffer);
1417   // Build debug location
1418   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1419   OS << ";" << PLoc.getFilename() << ";";
1420   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1421     OS << FD->getQualifiedNameAsString();
1422   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1423   return OS.str();
1424 }
1425 
1426 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1427                                                  SourceLocation Loc,
1428                                                  unsigned Flags) {
1429   uint32_t SrcLocStrSize;
1430   llvm::Constant *SrcLocStr;
1431   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1432       Loc.isInvalid()) {
1433     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1434   } else {
1435     std::string FunctionName;
1436     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1437       FunctionName = FD->getQualifiedNameAsString();
1438     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1439     const char *FileName = PLoc.getFilename();
1440     unsigned Line = PLoc.getLine();
1441     unsigned Column = PLoc.getColumn();
1442     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1443                                                 Column, SrcLocStrSize);
1444   }
1445   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1446   return OMPBuilder.getOrCreateIdent(
1447       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1448 }
1449 
1450 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1451                                           SourceLocation Loc) {
1452   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1453   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1454   // the clang invariants used below might be broken.
1455   if (CGM.getLangOpts().OpenMPIRBuilder) {
1456     SmallString<128> Buffer;
1457     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1458     uint32_t SrcLocStrSize;
1459     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1460         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1461     return OMPBuilder.getOrCreateThreadID(
1462         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1463   }
1464 
1465   llvm::Value *ThreadID = nullptr;
1466   // Check whether we've already cached a load of the thread id in this
1467   // function.
1468   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1469   if (I != OpenMPLocThreadIDMap.end()) {
1470     ThreadID = I->second.ThreadID;
1471     if (ThreadID != nullptr)
1472       return ThreadID;
1473   }
1474   // If exceptions are enabled, do not use parameter to avoid possible crash.
1475   if (auto *OMPRegionInfo =
1476           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1477     if (OMPRegionInfo->getThreadIDVariable()) {
1478       // Check if this an outlined function with thread id passed as argument.
1479       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1480       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1481       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1482           !CGF.getLangOpts().CXXExceptions ||
1483           CGF.Builder.GetInsertBlock() == TopBlock ||
1484           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1485           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1486               TopBlock ||
1487           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1488               CGF.Builder.GetInsertBlock()) {
1489         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1490         // If value loaded in entry block, cache it and use it everywhere in
1491         // function.
1492         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1493           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1494           Elem.second.ThreadID = ThreadID;
1495         }
1496         return ThreadID;
1497       }
1498     }
1499   }
1500 
1501   // This is not an outlined function region - need to call __kmpc_int32
1502   // kmpc_global_thread_num(ident_t *loc).
1503   // Generate thread id value and cache this value for use across the
1504   // function.
1505   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1506   if (!Elem.second.ServiceInsertPt)
1507     setLocThreadIdInsertPt(CGF);
1508   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1509   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1510   llvm::CallInst *Call = CGF.Builder.CreateCall(
1511       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1512                                             OMPRTL___kmpc_global_thread_num),
1513       emitUpdateLocation(CGF, Loc));
1514   Call->setCallingConv(CGF.getRuntimeCC());
1515   Elem.second.ThreadID = Call;
1516   return Call;
1517 }
1518 
1519 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1520   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1521   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1522     clearLocThreadIdInsertPt(CGF);
1523     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1524   }
1525   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1526     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1527       UDRMap.erase(D);
1528     FunctionUDRMap.erase(CGF.CurFn);
1529   }
1530   auto I = FunctionUDMMap.find(CGF.CurFn);
1531   if (I != FunctionUDMMap.end()) {
1532     for(const auto *D : I->second)
1533       UDMMap.erase(D);
1534     FunctionUDMMap.erase(I);
1535   }
1536   LastprivateConditionalToTypes.erase(CGF.CurFn);
1537   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1538 }
1539 
1540 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1541   return OMPBuilder.IdentPtr;
1542 }
1543 
1544 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1545   if (!Kmpc_MicroTy) {
1546     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1547     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1548                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1549     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1550   }
1551   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1552 }
1553 
1554 llvm::FunctionCallee
1555 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1556                                              bool IsGPUDistribute) {
1557   assert((IVSize == 32 || IVSize == 64) &&
1558          "IV size is not compatible with the omp runtime");
1559   StringRef Name;
1560   if (IsGPUDistribute)
1561     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1562                                     : "__kmpc_distribute_static_init_4u")
1563                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1564                                     : "__kmpc_distribute_static_init_8u");
1565   else
1566     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1567                                     : "__kmpc_for_static_init_4u")
1568                         : (IVSigned ? "__kmpc_for_static_init_8"
1569                                     : "__kmpc_for_static_init_8u");
1570 
1571   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1572   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1573   llvm::Type *TypeParams[] = {
1574     getIdentTyPointerTy(),                     // loc
1575     CGM.Int32Ty,                               // tid
1576     CGM.Int32Ty,                               // schedtype
1577     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1578     PtrTy,                                     // p_lower
1579     PtrTy,                                     // p_upper
1580     PtrTy,                                     // p_stride
1581     ITy,                                       // incr
1582     ITy                                        // chunk
1583   };
1584   auto *FnTy =
1585       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1586   return CGM.CreateRuntimeFunction(FnTy, Name);
1587 }
1588 
1589 llvm::FunctionCallee
1590 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1591   assert((IVSize == 32 || IVSize == 64) &&
1592          "IV size is not compatible with the omp runtime");
1593   StringRef Name =
1594       IVSize == 32
1595           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1596           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1597   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1598   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1599                                CGM.Int32Ty,           // tid
1600                                CGM.Int32Ty,           // schedtype
1601                                ITy,                   // lower
1602                                ITy,                   // upper
1603                                ITy,                   // stride
1604                                ITy                    // chunk
1605   };
1606   auto *FnTy =
1607       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1608   return CGM.CreateRuntimeFunction(FnTy, Name);
1609 }
1610 
1611 llvm::FunctionCallee
1612 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1613   assert((IVSize == 32 || IVSize == 64) &&
1614          "IV size is not compatible with the omp runtime");
1615   StringRef Name =
1616       IVSize == 32
1617           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1618           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1619   llvm::Type *TypeParams[] = {
1620       getIdentTyPointerTy(), // loc
1621       CGM.Int32Ty,           // tid
1622   };
1623   auto *FnTy =
1624       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1625   return CGM.CreateRuntimeFunction(FnTy, Name);
1626 }
1627 
1628 llvm::FunctionCallee
1629 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1630   assert((IVSize == 32 || IVSize == 64) &&
1631          "IV size is not compatible with the omp runtime");
1632   StringRef Name =
1633       IVSize == 32
1634           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1635           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1636   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1637   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1638   llvm::Type *TypeParams[] = {
1639     getIdentTyPointerTy(),                     // loc
1640     CGM.Int32Ty,                               // tid
1641     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1642     PtrTy,                                     // p_lower
1643     PtrTy,                                     // p_upper
1644     PtrTy                                      // p_stride
1645   };
1646   auto *FnTy =
1647       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1648   return CGM.CreateRuntimeFunction(FnTy, Name);
1649 }
1650 
1651 /// Obtain information that uniquely identifies a target entry. This
1652 /// consists of the file and device IDs as well as line number associated with
1653 /// the relevant entry source location.
1654 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1655                                      unsigned &DeviceID, unsigned &FileID,
1656                                      unsigned &LineNum) {
1657   SourceManager &SM = C.getSourceManager();
1658 
1659   // The loc should be always valid and have a file ID (the user cannot use
1660   // #pragma directives in macros)
1661 
1662   assert(Loc.isValid() && "Source location is expected to be always valid.");
1663 
1664   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1665   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1666 
1667   llvm::sys::fs::UniqueID ID;
1668   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1669     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1670     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1671     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1672       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1673           << PLoc.getFilename() << EC.message();
1674   }
1675 
1676   DeviceID = ID.getDevice();
1677   FileID = ID.getFile();
1678   LineNum = PLoc.getLine();
1679 }
1680 
1681 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1682   if (CGM.getLangOpts().OpenMPSimd)
1683     return Address::invalid();
1684   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1685       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1686   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1687               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1688                HasRequiresUnifiedSharedMemory))) {
1689     SmallString<64> PtrName;
1690     {
1691       llvm::raw_svector_ostream OS(PtrName);
1692       OS << CGM.getMangledName(GlobalDecl(VD));
1693       if (!VD->isExternallyVisible()) {
1694         unsigned DeviceID, FileID, Line;
1695         getTargetEntryUniqueInfo(CGM.getContext(),
1696                                  VD->getCanonicalDecl()->getBeginLoc(),
1697                                  DeviceID, FileID, Line);
1698         OS << llvm::format("_%x", FileID);
1699       }
1700       OS << "_decl_tgt_ref_ptr";
1701     }
1702     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1703     if (!Ptr) {
1704       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1705       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1706                                         PtrName);
1707 
1708       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1709       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1710 
1711       if (!CGM.getLangOpts().OpenMPIsDevice)
1712         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1713       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1714     }
1715     return Address::deprecated(Ptr, CGM.getContext().getDeclAlign(VD));
1716   }
1717   return Address::invalid();
1718 }
1719 
1720 llvm::Constant *
1721 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1722   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1723          !CGM.getContext().getTargetInfo().isTLSSupported());
1724   // Lookup the entry, lazily creating it if necessary.
1725   std::string Suffix = getName({"cache", ""});
1726   return getOrCreateInternalVariable(
1727       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1728 }
1729 
1730 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1731                                                 const VarDecl *VD,
1732                                                 Address VDAddr,
1733                                                 SourceLocation Loc) {
1734   if (CGM.getLangOpts().OpenMPUseTLS &&
1735       CGM.getContext().getTargetInfo().isTLSSupported())
1736     return VDAddr;
1737 
1738   llvm::Type *VarTy = VDAddr.getElementType();
1739   llvm::Value *Args[] = {
1740       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1741       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1742       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1743       getOrCreateThreadPrivateCache(VD)};
1744   return Address::deprecated(
1745       CGF.EmitRuntimeCall(
1746           OMPBuilder.getOrCreateRuntimeFunction(
1747               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1748           Args),
1749       VDAddr.getAlignment());
1750 }
1751 
1752 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1753     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1754     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1755   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1756   // library.
1757   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1758   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1759                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1760                       OMPLoc);
1761   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1762   // to register constructor/destructor for variable.
1763   llvm::Value *Args[] = {
1764       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1765       Ctor, CopyCtor, Dtor};
1766   CGF.EmitRuntimeCall(
1767       OMPBuilder.getOrCreateRuntimeFunction(
1768           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1769       Args);
1770 }
1771 
1772 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1773     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1774     bool PerformInit, CodeGenFunction *CGF) {
1775   if (CGM.getLangOpts().OpenMPUseTLS &&
1776       CGM.getContext().getTargetInfo().isTLSSupported())
1777     return nullptr;
1778 
1779   VD = VD->getDefinition(CGM.getContext());
1780   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1781     QualType ASTTy = VD->getType();
1782 
1783     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1784     const Expr *Init = VD->getAnyInitializer();
1785     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1786       // Generate function that re-emits the declaration's initializer into the
1787       // threadprivate copy of the variable VD
1788       CodeGenFunction CtorCGF(CGM);
1789       FunctionArgList Args;
1790       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1791                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1792                             ImplicitParamDecl::Other);
1793       Args.push_back(&Dst);
1794 
1795       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1796           CGM.getContext().VoidPtrTy, Args);
1797       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1798       std::string Name = getName({"__kmpc_global_ctor_", ""});
1799       llvm::Function *Fn =
1800           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1801       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1802                             Args, Loc, Loc);
1803       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1804           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1805           CGM.getContext().VoidPtrTy, Dst.getLocation());
1806       Address Arg = Address::deprecated(ArgVal, VDAddr.getAlignment());
1807       Arg = CtorCGF.Builder.CreateElementBitCast(
1808           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1809       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1810                                /*IsInitializer=*/true);
1811       ArgVal = CtorCGF.EmitLoadOfScalar(
1812           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1813           CGM.getContext().VoidPtrTy, Dst.getLocation());
1814       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1815       CtorCGF.FinishFunction();
1816       Ctor = Fn;
1817     }
1818     if (VD->getType().isDestructedType() != QualType::DK_none) {
1819       // Generate function that emits destructor call for the threadprivate copy
1820       // of the variable VD
1821       CodeGenFunction DtorCGF(CGM);
1822       FunctionArgList Args;
1823       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1824                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1825                             ImplicitParamDecl::Other);
1826       Args.push_back(&Dst);
1827 
1828       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1829           CGM.getContext().VoidTy, Args);
1830       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1831       std::string Name = getName({"__kmpc_global_dtor_", ""});
1832       llvm::Function *Fn =
1833           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1834       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1835       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1836                             Loc, Loc);
1837       // Create a scope with an artificial location for the body of this function.
1838       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1839       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1840           DtorCGF.GetAddrOfLocalVar(&Dst),
1841           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1842       DtorCGF.emitDestroy(Address::deprecated(ArgVal, VDAddr.getAlignment()),
1843                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1844                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1845       DtorCGF.FinishFunction();
1846       Dtor = Fn;
1847     }
1848     // Do not emit init function if it is not required.
1849     if (!Ctor && !Dtor)
1850       return nullptr;
1851 
1852     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1853     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1854                                                /*isVarArg=*/false)
1855                            ->getPointerTo();
1856     // Copying constructor for the threadprivate variable.
1857     // Must be NULL - reserved by runtime, but currently it requires that this
1858     // parameter is always NULL. Otherwise it fires assertion.
1859     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1860     if (Ctor == nullptr) {
1861       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1862                                              /*isVarArg=*/false)
1863                          ->getPointerTo();
1864       Ctor = llvm::Constant::getNullValue(CtorTy);
1865     }
1866     if (Dtor == nullptr) {
1867       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1868                                              /*isVarArg=*/false)
1869                          ->getPointerTo();
1870       Dtor = llvm::Constant::getNullValue(DtorTy);
1871     }
1872     if (!CGF) {
1873       auto *InitFunctionTy =
1874           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1875       std::string Name = getName({"__omp_threadprivate_init_", ""});
1876       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1877           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1878       CodeGenFunction InitCGF(CGM);
1879       FunctionArgList ArgList;
1880       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1881                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1882                             Loc, Loc);
1883       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1884       InitCGF.FinishFunction();
1885       return InitFunction;
1886     }
1887     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1888   }
1889   return nullptr;
1890 }
1891 
1892 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1893                                                      llvm::GlobalVariable *Addr,
1894                                                      bool PerformInit) {
1895   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1896       !CGM.getLangOpts().OpenMPIsDevice)
1897     return false;
1898   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1899       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1900   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1901       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1902        HasRequiresUnifiedSharedMemory))
1903     return CGM.getLangOpts().OpenMPIsDevice;
1904   VD = VD->getDefinition(CGM.getContext());
1905   assert(VD && "Unknown VarDecl");
1906 
1907   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1908     return CGM.getLangOpts().OpenMPIsDevice;
1909 
1910   QualType ASTTy = VD->getType();
1911   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1912 
1913   // Produce the unique prefix to identify the new target regions. We use
1914   // the source location of the variable declaration which we know to not
1915   // conflict with any target region.
1916   unsigned DeviceID;
1917   unsigned FileID;
1918   unsigned Line;
1919   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1920   SmallString<128> Buffer, Out;
1921   {
1922     llvm::raw_svector_ostream OS(Buffer);
1923     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1924        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1925   }
1926 
1927   const Expr *Init = VD->getAnyInitializer();
1928   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1929     llvm::Constant *Ctor;
1930     llvm::Constant *ID;
1931     if (CGM.getLangOpts().OpenMPIsDevice) {
1932       // Generate function that re-emits the declaration's initializer into
1933       // the threadprivate copy of the variable VD
1934       CodeGenFunction CtorCGF(CGM);
1935 
1936       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1937       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1938       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1939           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1940       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1941       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1942                             FunctionArgList(), Loc, Loc);
1943       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1944       llvm::Constant *AddrInAS0 = Addr;
1945       if (Addr->getAddressSpace() != 0)
1946         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1947             Addr, llvm::PointerType::getWithSamePointeeType(
1948                       cast<llvm::PointerType>(Addr->getType()), 0));
1949       CtorCGF.EmitAnyExprToMem(
1950           Init,
1951           Address::deprecated(AddrInAS0, CGM.getContext().getDeclAlign(VD)),
1952           Init->getType().getQualifiers(),
1953           /*IsInitializer=*/true);
1954       CtorCGF.FinishFunction();
1955       Ctor = Fn;
1956       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1957       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1958     } else {
1959       Ctor = new llvm::GlobalVariable(
1960           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1961           llvm::GlobalValue::PrivateLinkage,
1962           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1963       ID = Ctor;
1964     }
1965 
1966     // Register the information for the entry associated with the constructor.
1967     Out.clear();
1968     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1969         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1970         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1971   }
1972   if (VD->getType().isDestructedType() != QualType::DK_none) {
1973     llvm::Constant *Dtor;
1974     llvm::Constant *ID;
1975     if (CGM.getLangOpts().OpenMPIsDevice) {
1976       // Generate function that emits destructor call for the threadprivate
1977       // copy of the variable VD
1978       CodeGenFunction DtorCGF(CGM);
1979 
1980       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1981       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1982       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1983           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1984       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1985       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1986                             FunctionArgList(), Loc, Loc);
1987       // Create a scope with an artificial location for the body of this
1988       // function.
1989       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1990       llvm::Constant *AddrInAS0 = Addr;
1991       if (Addr->getAddressSpace() != 0)
1992         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1993             Addr, llvm::PointerType::getWithSamePointeeType(
1994                       cast<llvm::PointerType>(Addr->getType()), 0));
1995       DtorCGF.emitDestroy(
1996           Address::deprecated(AddrInAS0, CGM.getContext().getDeclAlign(VD)),
1997           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1998           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1999       DtorCGF.FinishFunction();
2000       Dtor = Fn;
2001       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2002       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2003     } else {
2004       Dtor = new llvm::GlobalVariable(
2005           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2006           llvm::GlobalValue::PrivateLinkage,
2007           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2008       ID = Dtor;
2009     }
2010     // Register the information for the entry associated with the destructor.
2011     Out.clear();
2012     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2013         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2014         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2015   }
2016   return CGM.getLangOpts().OpenMPIsDevice;
2017 }
2018 
2019 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2020                                                           QualType VarType,
2021                                                           StringRef Name) {
2022   std::string Suffix = getName({"artificial", ""});
2023   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2024   llvm::GlobalVariable *GAddr =
2025       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2026   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2027       CGM.getTarget().isTLSSupported()) {
2028     GAddr->setThreadLocal(/*Val=*/true);
2029     return Address(GAddr, GAddr->getValueType(),
2030                    CGM.getContext().getTypeAlignInChars(VarType));
2031   }
2032   std::string CacheSuffix = getName({"cache", ""});
2033   llvm::Value *Args[] = {
2034       emitUpdateLocation(CGF, SourceLocation()),
2035       getThreadID(CGF, SourceLocation()),
2036       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2037       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2038                                 /*isSigned=*/false),
2039       getOrCreateInternalVariable(
2040           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2041   return Address(
2042       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2043           CGF.EmitRuntimeCall(
2044               OMPBuilder.getOrCreateRuntimeFunction(
2045                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2046               Args),
2047           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2048       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
2049 }
2050 
2051 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2052                                    const RegionCodeGenTy &ThenGen,
2053                                    const RegionCodeGenTy &ElseGen) {
2054   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2055 
2056   // If the condition constant folds and can be elided, try to avoid emitting
2057   // the condition and the dead arm of the if/else.
2058   bool CondConstant;
2059   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2060     if (CondConstant)
2061       ThenGen(CGF);
2062     else
2063       ElseGen(CGF);
2064     return;
2065   }
2066 
2067   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2068   // emit the conditional branch.
2069   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2070   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2071   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2072   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2073 
2074   // Emit the 'then' code.
2075   CGF.EmitBlock(ThenBlock);
2076   ThenGen(CGF);
2077   CGF.EmitBranch(ContBlock);
2078   // Emit the 'else' code if present.
2079   // There is no need to emit line number for unconditional branch.
2080   (void)ApplyDebugLocation::CreateEmpty(CGF);
2081   CGF.EmitBlock(ElseBlock);
2082   ElseGen(CGF);
2083   // There is no need to emit line number for unconditional branch.
2084   (void)ApplyDebugLocation::CreateEmpty(CGF);
2085   CGF.EmitBranch(ContBlock);
2086   // Emit the continuation block for code after the if.
2087   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2088 }
2089 
2090 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2091                                        llvm::Function *OutlinedFn,
2092                                        ArrayRef<llvm::Value *> CapturedVars,
2093                                        const Expr *IfCond,
2094                                        llvm::Value *NumThreads) {
2095   if (!CGF.HaveInsertPoint())
2096     return;
2097   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2098   auto &M = CGM.getModule();
2099   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2100                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2101     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2102     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2103     llvm::Value *Args[] = {
2104         RTLoc,
2105         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2106         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2107     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2108     RealArgs.append(std::begin(Args), std::end(Args));
2109     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2110 
2111     llvm::FunctionCallee RTLFn =
2112         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2113     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2114   };
2115   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2116                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2117     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2118     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2119     // Build calls:
2120     // __kmpc_serialized_parallel(&Loc, GTid);
2121     llvm::Value *Args[] = {RTLoc, ThreadID};
2122     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2123                             M, OMPRTL___kmpc_serialized_parallel),
2124                         Args);
2125 
2126     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2127     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2128     Address ZeroAddrBound =
2129         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2130                                          /*Name=*/".bound.zero.addr");
2131     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2132     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2133     // ThreadId for serialized parallels is 0.
2134     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2135     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2136     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2137 
2138     // Ensure we do not inline the function. This is trivially true for the ones
2139     // passed to __kmpc_fork_call but the ones called in serialized regions
2140     // could be inlined. This is not a perfect but it is closer to the invariant
2141     // we want, namely, every data environment starts with a new function.
2142     // TODO: We should pass the if condition to the runtime function and do the
2143     //       handling there. Much cleaner code.
2144     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2145     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2146     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2147 
2148     // __kmpc_end_serialized_parallel(&Loc, GTid);
2149     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2150     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2151                             M, OMPRTL___kmpc_end_serialized_parallel),
2152                         EndArgs);
2153   };
2154   if (IfCond) {
2155     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2156   } else {
2157     RegionCodeGenTy ThenRCG(ThenGen);
2158     ThenRCG(CGF);
2159   }
2160 }
2161 
2162 // If we're inside an (outlined) parallel region, use the region info's
2163 // thread-ID variable (it is passed in a first argument of the outlined function
2164 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2165 // regular serial code region, get thread ID by calling kmp_int32
2166 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2167 // return the address of that temp.
2168 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2169                                              SourceLocation Loc) {
2170   if (auto *OMPRegionInfo =
2171           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2172     if (OMPRegionInfo->getThreadIDVariable())
2173       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2174 
2175   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2176   QualType Int32Ty =
2177       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2178   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2179   CGF.EmitStoreOfScalar(ThreadID,
2180                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2181 
2182   return ThreadIDTemp;
2183 }
2184 
2185 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2186     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2187   SmallString<256> Buffer;
2188   llvm::raw_svector_ostream Out(Buffer);
2189   Out << Name;
2190   StringRef RuntimeName = Out.str();
2191   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2192   if (Elem.second) {
2193     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2194            "OMP internal variable has different type than requested");
2195     return &*Elem.second;
2196   }
2197 
2198   return Elem.second = new llvm::GlobalVariable(
2199              CGM.getModule(), Ty, /*IsConstant*/ false,
2200              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2201              Elem.first(), /*InsertBefore=*/nullptr,
2202              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2203 }
2204 
2205 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2206   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2207   std::string Name = getName({Prefix, "var"});
2208   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2209 }
2210 
2211 namespace {
2212 /// Common pre(post)-action for different OpenMP constructs.
2213 class CommonActionTy final : public PrePostActionTy {
2214   llvm::FunctionCallee EnterCallee;
2215   ArrayRef<llvm::Value *> EnterArgs;
2216   llvm::FunctionCallee ExitCallee;
2217   ArrayRef<llvm::Value *> ExitArgs;
2218   bool Conditional;
2219   llvm::BasicBlock *ContBlock = nullptr;
2220 
2221 public:
2222   CommonActionTy(llvm::FunctionCallee EnterCallee,
2223                  ArrayRef<llvm::Value *> EnterArgs,
2224                  llvm::FunctionCallee ExitCallee,
2225                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2226       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2227         ExitArgs(ExitArgs), Conditional(Conditional) {}
2228   void Enter(CodeGenFunction &CGF) override {
2229     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2230     if (Conditional) {
2231       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2232       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2233       ContBlock = CGF.createBasicBlock("omp_if.end");
2234       // Generate the branch (If-stmt)
2235       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2236       CGF.EmitBlock(ThenBlock);
2237     }
2238   }
2239   void Done(CodeGenFunction &CGF) {
2240     // Emit the rest of blocks/branches
2241     CGF.EmitBranch(ContBlock);
2242     CGF.EmitBlock(ContBlock, true);
2243   }
2244   void Exit(CodeGenFunction &CGF) override {
2245     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2246   }
2247 };
2248 } // anonymous namespace
2249 
2250 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2251                                          StringRef CriticalName,
2252                                          const RegionCodeGenTy &CriticalOpGen,
2253                                          SourceLocation Loc, const Expr *Hint) {
2254   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2255   // CriticalOpGen();
2256   // __kmpc_end_critical(ident_t *, gtid, Lock);
2257   // Prepare arguments and build a call to __kmpc_critical
2258   if (!CGF.HaveInsertPoint())
2259     return;
2260   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2261                          getCriticalRegionLock(CriticalName)};
2262   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2263                                                 std::end(Args));
2264   if (Hint) {
2265     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2266         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2267   }
2268   CommonActionTy Action(
2269       OMPBuilder.getOrCreateRuntimeFunction(
2270           CGM.getModule(),
2271           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2272       EnterArgs,
2273       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2274                                             OMPRTL___kmpc_end_critical),
2275       Args);
2276   CriticalOpGen.setAction(Action);
2277   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2278 }
2279 
2280 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2281                                        const RegionCodeGenTy &MasterOpGen,
2282                                        SourceLocation Loc) {
2283   if (!CGF.HaveInsertPoint())
2284     return;
2285   // if(__kmpc_master(ident_t *, gtid)) {
2286   //   MasterOpGen();
2287   //   __kmpc_end_master(ident_t *, gtid);
2288   // }
2289   // Prepare arguments and build a call to __kmpc_master
2290   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2291   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2292                             CGM.getModule(), OMPRTL___kmpc_master),
2293                         Args,
2294                         OMPBuilder.getOrCreateRuntimeFunction(
2295                             CGM.getModule(), OMPRTL___kmpc_end_master),
2296                         Args,
2297                         /*Conditional=*/true);
2298   MasterOpGen.setAction(Action);
2299   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2300   Action.Done(CGF);
2301 }
2302 
2303 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2304                                        const RegionCodeGenTy &MaskedOpGen,
2305                                        SourceLocation Loc, const Expr *Filter) {
2306   if (!CGF.HaveInsertPoint())
2307     return;
2308   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2309   //   MaskedOpGen();
2310   //   __kmpc_end_masked(iden_t *, gtid);
2311   // }
2312   // Prepare arguments and build a call to __kmpc_masked
2313   llvm::Value *FilterVal = Filter
2314                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2315                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2316   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2317                          FilterVal};
2318   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2319                             getThreadID(CGF, Loc)};
2320   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2321                             CGM.getModule(), OMPRTL___kmpc_masked),
2322                         Args,
2323                         OMPBuilder.getOrCreateRuntimeFunction(
2324                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2325                         ArgsEnd,
2326                         /*Conditional=*/true);
2327   MaskedOpGen.setAction(Action);
2328   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2329   Action.Done(CGF);
2330 }
2331 
2332 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2333                                         SourceLocation Loc) {
2334   if (!CGF.HaveInsertPoint())
2335     return;
2336   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2337     OMPBuilder.createTaskyield(CGF.Builder);
2338   } else {
2339     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2340     llvm::Value *Args[] = {
2341         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2342         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2343     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2344                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2345                         Args);
2346   }
2347 
2348   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2349     Region->emitUntiedSwitch(CGF);
2350 }
2351 
2352 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2353                                           const RegionCodeGenTy &TaskgroupOpGen,
2354                                           SourceLocation Loc) {
2355   if (!CGF.HaveInsertPoint())
2356     return;
2357   // __kmpc_taskgroup(ident_t *, gtid);
2358   // TaskgroupOpGen();
2359   // __kmpc_end_taskgroup(ident_t *, gtid);
2360   // Prepare arguments and build a call to __kmpc_taskgroup
2361   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2362   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2363                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2364                         Args,
2365                         OMPBuilder.getOrCreateRuntimeFunction(
2366                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2367                         Args);
2368   TaskgroupOpGen.setAction(Action);
2369   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2370 }
2371 
2372 /// Given an array of pointers to variables, project the address of a
2373 /// given variable.
2374 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2375                                       unsigned Index, const VarDecl *Var) {
2376   // Pull out the pointer to the variable.
2377   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2378   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2379 
2380   Address Addr = Address::deprecated(Ptr, CGF.getContext().getDeclAlign(Var));
2381   Addr = CGF.Builder.CreateElementBitCast(
2382       Addr, CGF.ConvertTypeForMem(Var->getType()));
2383   return Addr;
2384 }
2385 
2386 static llvm::Value *emitCopyprivateCopyFunction(
2387     CodeGenModule &CGM, llvm::Type *ArgsType,
2388     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2389     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2390     SourceLocation Loc) {
2391   ASTContext &C = CGM.getContext();
2392   // void copy_func(void *LHSArg, void *RHSArg);
2393   FunctionArgList Args;
2394   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2395                            ImplicitParamDecl::Other);
2396   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2397                            ImplicitParamDecl::Other);
2398   Args.push_back(&LHSArg);
2399   Args.push_back(&RHSArg);
2400   const auto &CGFI =
2401       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2402   std::string Name =
2403       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2404   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2405                                     llvm::GlobalValue::InternalLinkage, Name,
2406                                     &CGM.getModule());
2407   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2408   Fn->setDoesNotRecurse();
2409   CodeGenFunction CGF(CGM);
2410   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2411   // Dest = (void*[n])(LHSArg);
2412   // Src = (void*[n])(RHSArg);
2413   Address LHS = Address::deprecated(
2414       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2415           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType),
2416       CGF.getPointerAlign());
2417   Address RHS = Address::deprecated(
2418       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2419           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType),
2420       CGF.getPointerAlign());
2421   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2422   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2423   // ...
2424   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2425   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2426     const auto *DestVar =
2427         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2428     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2429 
2430     const auto *SrcVar =
2431         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2432     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2433 
2434     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2435     QualType Type = VD->getType();
2436     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2437   }
2438   CGF.FinishFunction();
2439   return Fn;
2440 }
2441 
2442 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2443                                        const RegionCodeGenTy &SingleOpGen,
2444                                        SourceLocation Loc,
2445                                        ArrayRef<const Expr *> CopyprivateVars,
2446                                        ArrayRef<const Expr *> SrcExprs,
2447                                        ArrayRef<const Expr *> DstExprs,
2448                                        ArrayRef<const Expr *> AssignmentOps) {
2449   if (!CGF.HaveInsertPoint())
2450     return;
2451   assert(CopyprivateVars.size() == SrcExprs.size() &&
2452          CopyprivateVars.size() == DstExprs.size() &&
2453          CopyprivateVars.size() == AssignmentOps.size());
2454   ASTContext &C = CGM.getContext();
2455   // int32 did_it = 0;
2456   // if(__kmpc_single(ident_t *, gtid)) {
2457   //   SingleOpGen();
2458   //   __kmpc_end_single(ident_t *, gtid);
2459   //   did_it = 1;
2460   // }
2461   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2462   // <copy_func>, did_it);
2463 
2464   Address DidIt = Address::invalid();
2465   if (!CopyprivateVars.empty()) {
2466     // int32 did_it = 0;
2467     QualType KmpInt32Ty =
2468         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2469     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2470     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2471   }
2472   // Prepare arguments and build a call to __kmpc_single
2473   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2474   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2475                             CGM.getModule(), OMPRTL___kmpc_single),
2476                         Args,
2477                         OMPBuilder.getOrCreateRuntimeFunction(
2478                             CGM.getModule(), OMPRTL___kmpc_end_single),
2479                         Args,
2480                         /*Conditional=*/true);
2481   SingleOpGen.setAction(Action);
2482   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2483   if (DidIt.isValid()) {
2484     // did_it = 1;
2485     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2486   }
2487   Action.Done(CGF);
2488   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2489   // <copy_func>, did_it);
2490   if (DidIt.isValid()) {
2491     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2492     QualType CopyprivateArrayTy = C.getConstantArrayType(
2493         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2494         /*IndexTypeQuals=*/0);
2495     // Create a list of all private variables for copyprivate.
2496     Address CopyprivateList =
2497         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2498     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2499       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2500       CGF.Builder.CreateStore(
2501           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2502               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2503               CGF.VoidPtrTy),
2504           Elem);
2505     }
2506     // Build function that copies private values from single region to all other
2507     // threads in the corresponding parallel region.
2508     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2509         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2510         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2511     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2512     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2513         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2514     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2515     llvm::Value *Args[] = {
2516         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2517         getThreadID(CGF, Loc),        // i32 <gtid>
2518         BufSize,                      // size_t <buf_size>
2519         CL.getPointer(),              // void *<copyprivate list>
2520         CpyFn,                        // void (*) (void *, void *) <copy_func>
2521         DidItVal                      // i32 did_it
2522     };
2523     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2524                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2525                         Args);
2526   }
2527 }
2528 
2529 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2530                                         const RegionCodeGenTy &OrderedOpGen,
2531                                         SourceLocation Loc, bool IsThreads) {
2532   if (!CGF.HaveInsertPoint())
2533     return;
2534   // __kmpc_ordered(ident_t *, gtid);
2535   // OrderedOpGen();
2536   // __kmpc_end_ordered(ident_t *, gtid);
2537   // Prepare arguments and build a call to __kmpc_ordered
2538   if (IsThreads) {
2539     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2540     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2541                               CGM.getModule(), OMPRTL___kmpc_ordered),
2542                           Args,
2543                           OMPBuilder.getOrCreateRuntimeFunction(
2544                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2545                           Args);
2546     OrderedOpGen.setAction(Action);
2547     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2548     return;
2549   }
2550   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2551 }
2552 
2553 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2554   unsigned Flags;
2555   if (Kind == OMPD_for)
2556     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2557   else if (Kind == OMPD_sections)
2558     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2559   else if (Kind == OMPD_single)
2560     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2561   else if (Kind == OMPD_barrier)
2562     Flags = OMP_IDENT_BARRIER_EXPL;
2563   else
2564     Flags = OMP_IDENT_BARRIER_IMPL;
2565   return Flags;
2566 }
2567 
2568 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2569     CodeGenFunction &CGF, const OMPLoopDirective &S,
2570     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2571   // Check if the loop directive is actually a doacross loop directive. In this
2572   // case choose static, 1 schedule.
2573   if (llvm::any_of(
2574           S.getClausesOfKind<OMPOrderedClause>(),
2575           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2576     ScheduleKind = OMPC_SCHEDULE_static;
2577     // Chunk size is 1 in this case.
2578     llvm::APInt ChunkSize(32, 1);
2579     ChunkExpr = IntegerLiteral::Create(
2580         CGF.getContext(), ChunkSize,
2581         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2582         SourceLocation());
2583   }
2584 }
2585 
2586 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2587                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2588                                       bool ForceSimpleCall) {
2589   // Check if we should use the OMPBuilder
2590   auto *OMPRegionInfo =
2591       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2592   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2593     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2594         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2595     return;
2596   }
2597 
2598   if (!CGF.HaveInsertPoint())
2599     return;
2600   // Build call __kmpc_cancel_barrier(loc, thread_id);
2601   // Build call __kmpc_barrier(loc, thread_id);
2602   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2603   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2604   // thread_id);
2605   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2606                          getThreadID(CGF, Loc)};
2607   if (OMPRegionInfo) {
2608     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2609       llvm::Value *Result = CGF.EmitRuntimeCall(
2610           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2611                                                 OMPRTL___kmpc_cancel_barrier),
2612           Args);
2613       if (EmitChecks) {
2614         // if (__kmpc_cancel_barrier()) {
2615         //   exit from construct;
2616         // }
2617         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2618         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2619         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2620         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2621         CGF.EmitBlock(ExitBB);
2622         //   exit from construct;
2623         CodeGenFunction::JumpDest CancelDestination =
2624             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2625         CGF.EmitBranchThroughCleanup(CancelDestination);
2626         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2627       }
2628       return;
2629     }
2630   }
2631   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2632                           CGM.getModule(), OMPRTL___kmpc_barrier),
2633                       Args);
2634 }
2635 
2636 /// Map the OpenMP loop schedule to the runtime enumeration.
2637 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2638                                           bool Chunked, bool Ordered) {
2639   switch (ScheduleKind) {
2640   case OMPC_SCHEDULE_static:
2641     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2642                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2643   case OMPC_SCHEDULE_dynamic:
2644     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2645   case OMPC_SCHEDULE_guided:
2646     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2647   case OMPC_SCHEDULE_runtime:
2648     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2649   case OMPC_SCHEDULE_auto:
2650     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2651   case OMPC_SCHEDULE_unknown:
2652     assert(!Chunked && "chunk was specified but schedule kind not known");
2653     return Ordered ? OMP_ord_static : OMP_sch_static;
2654   }
2655   llvm_unreachable("Unexpected runtime schedule");
2656 }
2657 
2658 /// Map the OpenMP distribute schedule to the runtime enumeration.
2659 static OpenMPSchedType
2660 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2661   // only static is allowed for dist_schedule
2662   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2663 }
2664 
2665 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2666                                          bool Chunked) const {
2667   OpenMPSchedType Schedule =
2668       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2669   return Schedule == OMP_sch_static;
2670 }
2671 
2672 bool CGOpenMPRuntime::isStaticNonchunked(
2673     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2674   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2675   return Schedule == OMP_dist_sch_static;
2676 }
2677 
2678 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2679                                       bool Chunked) const {
2680   OpenMPSchedType Schedule =
2681       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2682   return Schedule == OMP_sch_static_chunked;
2683 }
2684 
2685 bool CGOpenMPRuntime::isStaticChunked(
2686     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2687   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2688   return Schedule == OMP_dist_sch_static_chunked;
2689 }
2690 
2691 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2692   OpenMPSchedType Schedule =
2693       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2694   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2695   return Schedule != OMP_sch_static;
2696 }
2697 
2698 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2699                                   OpenMPScheduleClauseModifier M1,
2700                                   OpenMPScheduleClauseModifier M2) {
2701   int Modifier = 0;
2702   switch (M1) {
2703   case OMPC_SCHEDULE_MODIFIER_monotonic:
2704     Modifier = OMP_sch_modifier_monotonic;
2705     break;
2706   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2707     Modifier = OMP_sch_modifier_nonmonotonic;
2708     break;
2709   case OMPC_SCHEDULE_MODIFIER_simd:
2710     if (Schedule == OMP_sch_static_chunked)
2711       Schedule = OMP_sch_static_balanced_chunked;
2712     break;
2713   case OMPC_SCHEDULE_MODIFIER_last:
2714   case OMPC_SCHEDULE_MODIFIER_unknown:
2715     break;
2716   }
2717   switch (M2) {
2718   case OMPC_SCHEDULE_MODIFIER_monotonic:
2719     Modifier = OMP_sch_modifier_monotonic;
2720     break;
2721   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2722     Modifier = OMP_sch_modifier_nonmonotonic;
2723     break;
2724   case OMPC_SCHEDULE_MODIFIER_simd:
2725     if (Schedule == OMP_sch_static_chunked)
2726       Schedule = OMP_sch_static_balanced_chunked;
2727     break;
2728   case OMPC_SCHEDULE_MODIFIER_last:
2729   case OMPC_SCHEDULE_MODIFIER_unknown:
2730     break;
2731   }
2732   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2733   // If the static schedule kind is specified or if the ordered clause is
2734   // specified, and if the nonmonotonic modifier is not specified, the effect is
2735   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2736   // modifier is specified, the effect is as if the nonmonotonic modifier is
2737   // specified.
2738   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2739     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2740           Schedule == OMP_sch_static_balanced_chunked ||
2741           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2742           Schedule == OMP_dist_sch_static_chunked ||
2743           Schedule == OMP_dist_sch_static))
2744       Modifier = OMP_sch_modifier_nonmonotonic;
2745   }
2746   return Schedule | Modifier;
2747 }
2748 
2749 void CGOpenMPRuntime::emitForDispatchInit(
2750     CodeGenFunction &CGF, SourceLocation Loc,
2751     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2752     bool Ordered, const DispatchRTInput &DispatchValues) {
2753   if (!CGF.HaveInsertPoint())
2754     return;
2755   OpenMPSchedType Schedule = getRuntimeSchedule(
2756       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2757   assert(Ordered ||
2758          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2759           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2760           Schedule != OMP_sch_static_balanced_chunked));
2761   // Call __kmpc_dispatch_init(
2762   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2763   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2764   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2765 
2766   // If the Chunk was not specified in the clause - use default value 1.
2767   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2768                                             : CGF.Builder.getIntN(IVSize, 1);
2769   llvm::Value *Args[] = {
2770       emitUpdateLocation(CGF, Loc),
2771       getThreadID(CGF, Loc),
2772       CGF.Builder.getInt32(addMonoNonMonoModifier(
2773           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2774       DispatchValues.LB,                                     // Lower
2775       DispatchValues.UB,                                     // Upper
2776       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2777       Chunk                                                  // Chunk
2778   };
2779   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2780 }
2781 
2782 static void emitForStaticInitCall(
2783     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2784     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2785     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2786     const CGOpenMPRuntime::StaticRTInput &Values) {
2787   if (!CGF.HaveInsertPoint())
2788     return;
2789 
2790   assert(!Values.Ordered);
2791   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2792          Schedule == OMP_sch_static_balanced_chunked ||
2793          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2794          Schedule == OMP_dist_sch_static ||
2795          Schedule == OMP_dist_sch_static_chunked);
2796 
2797   // Call __kmpc_for_static_init(
2798   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2799   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2800   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2801   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2802   llvm::Value *Chunk = Values.Chunk;
2803   if (Chunk == nullptr) {
2804     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2805             Schedule == OMP_dist_sch_static) &&
2806            "expected static non-chunked schedule");
2807     // If the Chunk was not specified in the clause - use default value 1.
2808     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2809   } else {
2810     assert((Schedule == OMP_sch_static_chunked ||
2811             Schedule == OMP_sch_static_balanced_chunked ||
2812             Schedule == OMP_ord_static_chunked ||
2813             Schedule == OMP_dist_sch_static_chunked) &&
2814            "expected static chunked schedule");
2815   }
2816   llvm::Value *Args[] = {
2817       UpdateLocation,
2818       ThreadId,
2819       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2820                                                   M2)), // Schedule type
2821       Values.IL.getPointer(),                           // &isLastIter
2822       Values.LB.getPointer(),                           // &LB
2823       Values.UB.getPointer(),                           // &UB
2824       Values.ST.getPointer(),                           // &Stride
2825       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2826       Chunk                                             // Chunk
2827   };
2828   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2829 }
2830 
2831 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2832                                         SourceLocation Loc,
2833                                         OpenMPDirectiveKind DKind,
2834                                         const OpenMPScheduleTy &ScheduleKind,
2835                                         const StaticRTInput &Values) {
2836   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2837       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2838   assert(isOpenMPWorksharingDirective(DKind) &&
2839          "Expected loop-based or sections-based directive.");
2840   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2841                                              isOpenMPLoopDirective(DKind)
2842                                                  ? OMP_IDENT_WORK_LOOP
2843                                                  : OMP_IDENT_WORK_SECTIONS);
2844   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2845   llvm::FunctionCallee StaticInitFunction =
2846       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2847   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2848   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2849                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2850 }
2851 
2852 void CGOpenMPRuntime::emitDistributeStaticInit(
2853     CodeGenFunction &CGF, SourceLocation Loc,
2854     OpenMPDistScheduleClauseKind SchedKind,
2855     const CGOpenMPRuntime::StaticRTInput &Values) {
2856   OpenMPSchedType ScheduleNum =
2857       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2858   llvm::Value *UpdatedLocation =
2859       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2860   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2861   llvm::FunctionCallee StaticInitFunction;
2862   bool isGPUDistribute =
2863       CGM.getLangOpts().OpenMPIsDevice &&
2864       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2865   StaticInitFunction = createForStaticInitFunction(
2866       Values.IVSize, Values.IVSigned, isGPUDistribute);
2867 
2868   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2869                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2870                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2871 }
2872 
2873 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2874                                           SourceLocation Loc,
2875                                           OpenMPDirectiveKind DKind) {
2876   if (!CGF.HaveInsertPoint())
2877     return;
2878   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2879   llvm::Value *Args[] = {
2880       emitUpdateLocation(CGF, Loc,
2881                          isOpenMPDistributeDirective(DKind)
2882                              ? OMP_IDENT_WORK_DISTRIBUTE
2883                              : isOpenMPLoopDirective(DKind)
2884                                    ? OMP_IDENT_WORK_LOOP
2885                                    : OMP_IDENT_WORK_SECTIONS),
2886       getThreadID(CGF, Loc)};
2887   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2888   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2889       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2890     CGF.EmitRuntimeCall(
2891         OMPBuilder.getOrCreateRuntimeFunction(
2892             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2893         Args);
2894   else
2895     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2896                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2897                         Args);
2898 }
2899 
2900 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2901                                                  SourceLocation Loc,
2902                                                  unsigned IVSize,
2903                                                  bool IVSigned) {
2904   if (!CGF.HaveInsertPoint())
2905     return;
2906   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2907   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2908   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2909 }
2910 
2911 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2912                                           SourceLocation Loc, unsigned IVSize,
2913                                           bool IVSigned, Address IL,
2914                                           Address LB, Address UB,
2915                                           Address ST) {
2916   // Call __kmpc_dispatch_next(
2917   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2918   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2919   //          kmp_int[32|64] *p_stride);
2920   llvm::Value *Args[] = {
2921       emitUpdateLocation(CGF, Loc),
2922       getThreadID(CGF, Loc),
2923       IL.getPointer(), // &isLastIter
2924       LB.getPointer(), // &Lower
2925       UB.getPointer(), // &Upper
2926       ST.getPointer()  // &Stride
2927   };
2928   llvm::Value *Call =
2929       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2930   return CGF.EmitScalarConversion(
2931       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2932       CGF.getContext().BoolTy, Loc);
2933 }
2934 
2935 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2936                                            llvm::Value *NumThreads,
2937                                            SourceLocation Loc) {
2938   if (!CGF.HaveInsertPoint())
2939     return;
2940   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2941   llvm::Value *Args[] = {
2942       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2943       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2944   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2945                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2946                       Args);
2947 }
2948 
2949 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2950                                          ProcBindKind ProcBind,
2951                                          SourceLocation Loc) {
2952   if (!CGF.HaveInsertPoint())
2953     return;
2954   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2955   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2956   llvm::Value *Args[] = {
2957       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2958       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2959   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2960                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2961                       Args);
2962 }
2963 
2964 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2965                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2966   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2967     OMPBuilder.createFlush(CGF.Builder);
2968   } else {
2969     if (!CGF.HaveInsertPoint())
2970       return;
2971     // Build call void __kmpc_flush(ident_t *loc)
2972     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2973                             CGM.getModule(), OMPRTL___kmpc_flush),
2974                         emitUpdateLocation(CGF, Loc));
2975   }
2976 }
2977 
2978 namespace {
2979 /// Indexes of fields for type kmp_task_t.
2980 enum KmpTaskTFields {
2981   /// List of shared variables.
2982   KmpTaskTShareds,
2983   /// Task routine.
2984   KmpTaskTRoutine,
2985   /// Partition id for the untied tasks.
2986   KmpTaskTPartId,
2987   /// Function with call of destructors for private variables.
2988   Data1,
2989   /// Task priority.
2990   Data2,
2991   /// (Taskloops only) Lower bound.
2992   KmpTaskTLowerBound,
2993   /// (Taskloops only) Upper bound.
2994   KmpTaskTUpperBound,
2995   /// (Taskloops only) Stride.
2996   KmpTaskTStride,
2997   /// (Taskloops only) Is last iteration flag.
2998   KmpTaskTLastIter,
2999   /// (Taskloops only) Reduction data.
3000   KmpTaskTReductions,
3001 };
3002 } // anonymous namespace
3003 
3004 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3005   return OffloadEntriesTargetRegion.empty() &&
3006          OffloadEntriesDeviceGlobalVar.empty();
3007 }
3008 
3009 /// Initialize target region entry.
3010 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3011     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3012                                     StringRef ParentName, unsigned LineNum,
3013                                     unsigned Order) {
3014   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3015                                              "only required for the device "
3016                                              "code generation.");
3017   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3018       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3019                                    OMPTargetRegionEntryTargetRegion);
3020   ++OffloadingEntriesNum;
3021 }
3022 
3023 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3024     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3025                                   StringRef ParentName, unsigned LineNum,
3026                                   llvm::Constant *Addr, llvm::Constant *ID,
3027                                   OMPTargetRegionEntryKind Flags) {
3028   // If we are emitting code for a target, the entry is already initialized,
3029   // only has to be registered.
3030   if (CGM.getLangOpts().OpenMPIsDevice) {
3031     // This could happen if the device compilation is invoked standalone.
3032     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3033       return;
3034     auto &Entry =
3035         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3036     Entry.setAddress(Addr);
3037     Entry.setID(ID);
3038     Entry.setFlags(Flags);
3039   } else {
3040     if (Flags ==
3041             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3042         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3043                                  /*IgnoreAddressId*/ true))
3044       return;
3045     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3046            "Target region entry already registered!");
3047     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3048     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3049     ++OffloadingEntriesNum;
3050   }
3051 }
3052 
3053 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3054     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3055     bool IgnoreAddressId) const {
3056   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3057   if (PerDevice == OffloadEntriesTargetRegion.end())
3058     return false;
3059   auto PerFile = PerDevice->second.find(FileID);
3060   if (PerFile == PerDevice->second.end())
3061     return false;
3062   auto PerParentName = PerFile->second.find(ParentName);
3063   if (PerParentName == PerFile->second.end())
3064     return false;
3065   auto PerLine = PerParentName->second.find(LineNum);
3066   if (PerLine == PerParentName->second.end())
3067     return false;
3068   // Fail if this entry is already registered.
3069   if (!IgnoreAddressId &&
3070       (PerLine->second.getAddress() || PerLine->second.getID()))
3071     return false;
3072   return true;
3073 }
3074 
3075 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3076     const OffloadTargetRegionEntryInfoActTy &Action) {
3077   // Scan all target region entries and perform the provided action.
3078   for (const auto &D : OffloadEntriesTargetRegion)
3079     for (const auto &F : D.second)
3080       for (const auto &P : F.second)
3081         for (const auto &L : P.second)
3082           Action(D.first, F.first, P.first(), L.first, L.second);
3083 }
3084 
3085 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3086     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3087                                        OMPTargetGlobalVarEntryKind Flags,
3088                                        unsigned Order) {
3089   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3090                                              "only required for the device "
3091                                              "code generation.");
3092   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3093   ++OffloadingEntriesNum;
3094 }
3095 
3096 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3097     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3098                                      CharUnits VarSize,
3099                                      OMPTargetGlobalVarEntryKind Flags,
3100                                      llvm::GlobalValue::LinkageTypes Linkage) {
3101   if (CGM.getLangOpts().OpenMPIsDevice) {
3102     // This could happen if the device compilation is invoked standalone.
3103     if (!hasDeviceGlobalVarEntryInfo(VarName))
3104       return;
3105     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3106     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3107       if (Entry.getVarSize().isZero()) {
3108         Entry.setVarSize(VarSize);
3109         Entry.setLinkage(Linkage);
3110       }
3111       return;
3112     }
3113     Entry.setVarSize(VarSize);
3114     Entry.setLinkage(Linkage);
3115     Entry.setAddress(Addr);
3116   } else {
3117     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3118       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3119       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3120              "Entry not initialized!");
3121       if (Entry.getVarSize().isZero()) {
3122         Entry.setVarSize(VarSize);
3123         Entry.setLinkage(Linkage);
3124       }
3125       return;
3126     }
3127     OffloadEntriesDeviceGlobalVar.try_emplace(
3128         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3129     ++OffloadingEntriesNum;
3130   }
3131 }
3132 
3133 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3134     actOnDeviceGlobalVarEntriesInfo(
3135         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3136   // Scan all target region entries and perform the provided action.
3137   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3138     Action(E.getKey(), E.getValue());
3139 }
3140 
3141 void CGOpenMPRuntime::createOffloadEntry(
3142     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3143     llvm::GlobalValue::LinkageTypes Linkage) {
3144   StringRef Name = Addr->getName();
3145   llvm::Module &M = CGM.getModule();
3146   llvm::LLVMContext &C = M.getContext();
3147 
3148   // Create constant string with the name.
3149   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3150 
3151   std::string StringName = getName({"omp_offloading", "entry_name"});
3152   auto *Str = new llvm::GlobalVariable(
3153       M, StrPtrInit->getType(), /*isConstant=*/true,
3154       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3155   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3156 
3157   llvm::Constant *Data[] = {
3158       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3159       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3160       llvm::ConstantInt::get(CGM.SizeTy, Size),
3161       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3162       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3163   std::string EntryName = getName({"omp_offloading", "entry", ""});
3164   llvm::GlobalVariable *Entry = createGlobalStruct(
3165       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3166       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3167 
3168   // The entry has to be created in the section the linker expects it to be.
3169   Entry->setSection("omp_offloading_entries");
3170 }
3171 
3172 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3173   // Emit the offloading entries and metadata so that the device codegen side
3174   // can easily figure out what to emit. The produced metadata looks like
3175   // this:
3176   //
3177   // !omp_offload.info = !{!1, ...}
3178   //
3179   // Right now we only generate metadata for function that contain target
3180   // regions.
3181 
3182   // If we are in simd mode or there are no entries, we don't need to do
3183   // anything.
3184   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3185     return;
3186 
3187   llvm::Module &M = CGM.getModule();
3188   llvm::LLVMContext &C = M.getContext();
3189   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3190                          SourceLocation, StringRef>,
3191               16>
3192       OrderedEntries(OffloadEntriesInfoManager.size());
3193   llvm::SmallVector<StringRef, 16> ParentFunctions(
3194       OffloadEntriesInfoManager.size());
3195 
3196   // Auxiliary methods to create metadata values and strings.
3197   auto &&GetMDInt = [this](unsigned V) {
3198     return llvm::ConstantAsMetadata::get(
3199         llvm::ConstantInt::get(CGM.Int32Ty, V));
3200   };
3201 
3202   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3203 
3204   // Create the offloading info metadata node.
3205   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3206 
3207   // Create function that emits metadata for each target region entry;
3208   auto &&TargetRegionMetadataEmitter =
3209       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3210        &GetMDString](
3211           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3212           unsigned Line,
3213           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3214         // Generate metadata for target regions. Each entry of this metadata
3215         // contains:
3216         // - Entry 0 -> Kind of this type of metadata (0).
3217         // - Entry 1 -> Device ID of the file where the entry was identified.
3218         // - Entry 2 -> File ID of the file where the entry was identified.
3219         // - Entry 3 -> Mangled name of the function where the entry was
3220         // identified.
3221         // - Entry 4 -> Line in the file where the entry was identified.
3222         // - Entry 5 -> Order the entry was created.
3223         // The first element of the metadata node is the kind.
3224         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3225                                  GetMDInt(FileID),      GetMDString(ParentName),
3226                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3227 
3228         SourceLocation Loc;
3229         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3230                   E = CGM.getContext().getSourceManager().fileinfo_end();
3231              I != E; ++I) {
3232           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3233               I->getFirst()->getUniqueID().getFile() == FileID) {
3234             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3235                 I->getFirst(), Line, 1);
3236             break;
3237           }
3238         }
3239         // Save this entry in the right position of the ordered entries array.
3240         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3241         ParentFunctions[E.getOrder()] = ParentName;
3242 
3243         // Add metadata to the named metadata node.
3244         MD->addOperand(llvm::MDNode::get(C, Ops));
3245       };
3246 
3247   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3248       TargetRegionMetadataEmitter);
3249 
3250   // Create function that emits metadata for each device global variable entry;
3251   auto &&DeviceGlobalVarMetadataEmitter =
3252       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3253        MD](StringRef MangledName,
3254            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3255                &E) {
3256         // Generate metadata for global variables. Each entry of this metadata
3257         // contains:
3258         // - Entry 0 -> Kind of this type of metadata (1).
3259         // - Entry 1 -> Mangled name of the variable.
3260         // - Entry 2 -> Declare target kind.
3261         // - Entry 3 -> Order the entry was created.
3262         // The first element of the metadata node is the kind.
3263         llvm::Metadata *Ops[] = {
3264             GetMDInt(E.getKind()), GetMDString(MangledName),
3265             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3266 
3267         // Save this entry in the right position of the ordered entries array.
3268         OrderedEntries[E.getOrder()] =
3269             std::make_tuple(&E, SourceLocation(), MangledName);
3270 
3271         // Add metadata to the named metadata node.
3272         MD->addOperand(llvm::MDNode::get(C, Ops));
3273       };
3274 
3275   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3276       DeviceGlobalVarMetadataEmitter);
3277 
3278   for (const auto &E : OrderedEntries) {
3279     assert(std::get<0>(E) && "All ordered entries must exist!");
3280     if (const auto *CE =
3281             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3282                 std::get<0>(E))) {
3283       if (!CE->getID() || !CE->getAddress()) {
3284         // Do not blame the entry if the parent funtion is not emitted.
3285         StringRef FnName = ParentFunctions[CE->getOrder()];
3286         if (!CGM.GetGlobalValue(FnName))
3287           continue;
3288         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3289             DiagnosticsEngine::Error,
3290             "Offloading entry for target region in %0 is incorrect: either the "
3291             "address or the ID is invalid.");
3292         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3293         continue;
3294       }
3295       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3296                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3297     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3298                                              OffloadEntryInfoDeviceGlobalVar>(
3299                    std::get<0>(E))) {
3300       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3301           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3302               CE->getFlags());
3303       switch (Flags) {
3304       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3305         if (CGM.getLangOpts().OpenMPIsDevice &&
3306             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3307           continue;
3308         if (!CE->getAddress()) {
3309           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3310               DiagnosticsEngine::Error, "Offloading entry for declare target "
3311                                         "variable %0 is incorrect: the "
3312                                         "address is invalid.");
3313           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3314           continue;
3315         }
3316         // The vaiable has no definition - no need to add the entry.
3317         if (CE->getVarSize().isZero())
3318           continue;
3319         break;
3320       }
3321       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3322         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3323                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3324                "Declaret target link address is set.");
3325         if (CGM.getLangOpts().OpenMPIsDevice)
3326           continue;
3327         if (!CE->getAddress()) {
3328           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3329               DiagnosticsEngine::Error,
3330               "Offloading entry for declare target variable is incorrect: the "
3331               "address is invalid.");
3332           CGM.getDiags().Report(DiagID);
3333           continue;
3334         }
3335         break;
3336       }
3337       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3338                          CE->getVarSize().getQuantity(), Flags,
3339                          CE->getLinkage());
3340     } else {
3341       llvm_unreachable("Unsupported entry kind.");
3342     }
3343   }
3344 }
3345 
3346 /// Loads all the offload entries information from the host IR
3347 /// metadata.
3348 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3349   // If we are in target mode, load the metadata from the host IR. This code has
3350   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3351 
3352   if (!CGM.getLangOpts().OpenMPIsDevice)
3353     return;
3354 
3355   if (CGM.getLangOpts().OMPHostIRFile.empty())
3356     return;
3357 
3358   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3359   if (auto EC = Buf.getError()) {
3360     CGM.getDiags().Report(diag::err_cannot_open_file)
3361         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3362     return;
3363   }
3364 
3365   llvm::LLVMContext C;
3366   auto ME = expectedToErrorOrAndEmitErrors(
3367       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3368 
3369   if (auto EC = ME.getError()) {
3370     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3371         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3372     CGM.getDiags().Report(DiagID)
3373         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3374     return;
3375   }
3376 
3377   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3378   if (!MD)
3379     return;
3380 
3381   for (llvm::MDNode *MN : MD->operands()) {
3382     auto &&GetMDInt = [MN](unsigned Idx) {
3383       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3384       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3385     };
3386 
3387     auto &&GetMDString = [MN](unsigned Idx) {
3388       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3389       return V->getString();
3390     };
3391 
3392     switch (GetMDInt(0)) {
3393     default:
3394       llvm_unreachable("Unexpected metadata!");
3395       break;
3396     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3397         OffloadingEntryInfoTargetRegion:
3398       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3399           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3400           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3401           /*Order=*/GetMDInt(5));
3402       break;
3403     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3404         OffloadingEntryInfoDeviceGlobalVar:
3405       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3406           /*MangledName=*/GetMDString(1),
3407           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3408               /*Flags=*/GetMDInt(2)),
3409           /*Order=*/GetMDInt(3));
3410       break;
3411     }
3412   }
3413 }
3414 
3415 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3416   if (!KmpRoutineEntryPtrTy) {
3417     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3418     ASTContext &C = CGM.getContext();
3419     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3420     FunctionProtoType::ExtProtoInfo EPI;
3421     KmpRoutineEntryPtrQTy = C.getPointerType(
3422         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3423     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3424   }
3425 }
3426 
3427 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3428   // Make sure the type of the entry is already created. This is the type we
3429   // have to create:
3430   // struct __tgt_offload_entry{
3431   //   void      *addr;       // Pointer to the offload entry info.
3432   //                          // (function or global)
3433   //   char      *name;       // Name of the function or global.
3434   //   size_t     size;       // Size of the entry info (0 if it a function).
3435   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3436   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3437   // };
3438   if (TgtOffloadEntryQTy.isNull()) {
3439     ASTContext &C = CGM.getContext();
3440     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3441     RD->startDefinition();
3442     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3443     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3444     addFieldToRecordDecl(C, RD, C.getSizeType());
3445     addFieldToRecordDecl(
3446         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3447     addFieldToRecordDecl(
3448         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3449     RD->completeDefinition();
3450     RD->addAttr(PackedAttr::CreateImplicit(C));
3451     TgtOffloadEntryQTy = C.getRecordType(RD);
3452   }
3453   return TgtOffloadEntryQTy;
3454 }
3455 
3456 namespace {
3457 struct PrivateHelpersTy {
3458   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3459                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3460       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3461         PrivateElemInit(PrivateElemInit) {}
3462   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3463   const Expr *OriginalRef = nullptr;
3464   const VarDecl *Original = nullptr;
3465   const VarDecl *PrivateCopy = nullptr;
3466   const VarDecl *PrivateElemInit = nullptr;
3467   bool isLocalPrivate() const {
3468     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3469   }
3470 };
3471 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3472 } // anonymous namespace
3473 
3474 static bool isAllocatableDecl(const VarDecl *VD) {
3475   const VarDecl *CVD = VD->getCanonicalDecl();
3476   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3477     return false;
3478   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3479   // Use the default allocation.
3480   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3481            !AA->getAllocator());
3482 }
3483 
3484 static RecordDecl *
3485 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3486   if (!Privates.empty()) {
3487     ASTContext &C = CGM.getContext();
3488     // Build struct .kmp_privates_t. {
3489     //         /*  private vars  */
3490     //       };
3491     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3492     RD->startDefinition();
3493     for (const auto &Pair : Privates) {
3494       const VarDecl *VD = Pair.second.Original;
3495       QualType Type = VD->getType().getNonReferenceType();
3496       // If the private variable is a local variable with lvalue ref type,
3497       // allocate the pointer instead of the pointee type.
3498       if (Pair.second.isLocalPrivate()) {
3499         if (VD->getType()->isLValueReferenceType())
3500           Type = C.getPointerType(Type);
3501         if (isAllocatableDecl(VD))
3502           Type = C.getPointerType(Type);
3503       }
3504       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3505       if (VD->hasAttrs()) {
3506         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3507              E(VD->getAttrs().end());
3508              I != E; ++I)
3509           FD->addAttr(*I);
3510       }
3511     }
3512     RD->completeDefinition();
3513     return RD;
3514   }
3515   return nullptr;
3516 }
3517 
3518 static RecordDecl *
3519 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3520                          QualType KmpInt32Ty,
3521                          QualType KmpRoutineEntryPointerQTy) {
3522   ASTContext &C = CGM.getContext();
3523   // Build struct kmp_task_t {
3524   //         void *              shareds;
3525   //         kmp_routine_entry_t routine;
3526   //         kmp_int32           part_id;
3527   //         kmp_cmplrdata_t data1;
3528   //         kmp_cmplrdata_t data2;
3529   // For taskloops additional fields:
3530   //         kmp_uint64          lb;
3531   //         kmp_uint64          ub;
3532   //         kmp_int64           st;
3533   //         kmp_int32           liter;
3534   //         void *              reductions;
3535   //       };
3536   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3537   UD->startDefinition();
3538   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3539   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3540   UD->completeDefinition();
3541   QualType KmpCmplrdataTy = C.getRecordType(UD);
3542   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3543   RD->startDefinition();
3544   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3545   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3546   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3547   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3548   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3549   if (isOpenMPTaskLoopDirective(Kind)) {
3550     QualType KmpUInt64Ty =
3551         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3552     QualType KmpInt64Ty =
3553         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3554     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3555     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3556     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3557     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3558     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3559   }
3560   RD->completeDefinition();
3561   return RD;
3562 }
3563 
3564 static RecordDecl *
3565 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3566                                      ArrayRef<PrivateDataTy> Privates) {
3567   ASTContext &C = CGM.getContext();
3568   // Build struct kmp_task_t_with_privates {
3569   //         kmp_task_t task_data;
3570   //         .kmp_privates_t. privates;
3571   //       };
3572   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3573   RD->startDefinition();
3574   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3575   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3576     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3577   RD->completeDefinition();
3578   return RD;
3579 }
3580 
3581 /// Emit a proxy function which accepts kmp_task_t as the second
3582 /// argument.
3583 /// \code
3584 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3585 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3586 ///   For taskloops:
3587 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3588 ///   tt->reductions, tt->shareds);
3589 ///   return 0;
3590 /// }
3591 /// \endcode
3592 static llvm::Function *
3593 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3594                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3595                       QualType KmpTaskTWithPrivatesPtrQTy,
3596                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3597                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3598                       llvm::Value *TaskPrivatesMap) {
3599   ASTContext &C = CGM.getContext();
3600   FunctionArgList Args;
3601   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3602                             ImplicitParamDecl::Other);
3603   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3604                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3605                                 ImplicitParamDecl::Other);
3606   Args.push_back(&GtidArg);
3607   Args.push_back(&TaskTypeArg);
3608   const auto &TaskEntryFnInfo =
3609       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3610   llvm::FunctionType *TaskEntryTy =
3611       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3612   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3613   auto *TaskEntry = llvm::Function::Create(
3614       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3615   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3616   TaskEntry->setDoesNotRecurse();
3617   CodeGenFunction CGF(CGM);
3618   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3619                     Loc, Loc);
3620 
3621   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3622   // tt,
3623   // For taskloops:
3624   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3625   // tt->task_data.shareds);
3626   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3627       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3628   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3629       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3630       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3631   const auto *KmpTaskTWithPrivatesQTyRD =
3632       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3633   LValue Base =
3634       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3635   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3636   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3637   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3638   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3639 
3640   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3641   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3642   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3643       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3644       CGF.ConvertTypeForMem(SharedsPtrTy));
3645 
3646   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3647   llvm::Value *PrivatesParam;
3648   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3649     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3650     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3651         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3652   } else {
3653     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3654   }
3655 
3656   llvm::Value *CommonArgs[] = {
3657       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3658       CGF.Builder
3659           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3660                                                CGF.VoidPtrTy, CGF.Int8Ty)
3661           .getPointer()};
3662   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3663                                           std::end(CommonArgs));
3664   if (isOpenMPTaskLoopDirective(Kind)) {
3665     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3666     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3667     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3668     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3669     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3670     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3671     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3672     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3673     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3674     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3675     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3676     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3677     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3678     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3679     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3680     CallArgs.push_back(LBParam);
3681     CallArgs.push_back(UBParam);
3682     CallArgs.push_back(StParam);
3683     CallArgs.push_back(LIParam);
3684     CallArgs.push_back(RParam);
3685   }
3686   CallArgs.push_back(SharedsParam);
3687 
3688   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3689                                                   CallArgs);
3690   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3691                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3692   CGF.FinishFunction();
3693   return TaskEntry;
3694 }
3695 
3696 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3697                                             SourceLocation Loc,
3698                                             QualType KmpInt32Ty,
3699                                             QualType KmpTaskTWithPrivatesPtrQTy,
3700                                             QualType KmpTaskTWithPrivatesQTy) {
3701   ASTContext &C = CGM.getContext();
3702   FunctionArgList Args;
3703   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3704                             ImplicitParamDecl::Other);
3705   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3706                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3707                                 ImplicitParamDecl::Other);
3708   Args.push_back(&GtidArg);
3709   Args.push_back(&TaskTypeArg);
3710   const auto &DestructorFnInfo =
3711       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3712   llvm::FunctionType *DestructorFnTy =
3713       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3714   std::string Name =
3715       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3716   auto *DestructorFn =
3717       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3718                              Name, &CGM.getModule());
3719   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3720                                     DestructorFnInfo);
3721   DestructorFn->setDoesNotRecurse();
3722   CodeGenFunction CGF(CGM);
3723   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3724                     Args, Loc, Loc);
3725 
3726   LValue Base = CGF.EmitLoadOfPointerLValue(
3727       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3728       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3729   const auto *KmpTaskTWithPrivatesQTyRD =
3730       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3731   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3732   Base = CGF.EmitLValueForField(Base, *FI);
3733   for (const auto *Field :
3734        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3735     if (QualType::DestructionKind DtorKind =
3736             Field->getType().isDestructedType()) {
3737       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3738       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3739     }
3740   }
3741   CGF.FinishFunction();
3742   return DestructorFn;
3743 }
3744 
3745 /// Emit a privates mapping function for correct handling of private and
3746 /// firstprivate variables.
3747 /// \code
3748 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3749 /// **noalias priv1,...,  <tyn> **noalias privn) {
3750 ///   *priv1 = &.privates.priv1;
3751 ///   ...;
3752 ///   *privn = &.privates.privn;
3753 /// }
3754 /// \endcode
3755 static llvm::Value *
3756 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3757                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3758                                ArrayRef<PrivateDataTy> Privates) {
3759   ASTContext &C = CGM.getContext();
3760   FunctionArgList Args;
3761   ImplicitParamDecl TaskPrivatesArg(
3762       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3763       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3764       ImplicitParamDecl::Other);
3765   Args.push_back(&TaskPrivatesArg);
3766   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3767   unsigned Counter = 1;
3768   for (const Expr *E : Data.PrivateVars) {
3769     Args.push_back(ImplicitParamDecl::Create(
3770         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3771         C.getPointerType(C.getPointerType(E->getType()))
3772             .withConst()
3773             .withRestrict(),
3774         ImplicitParamDecl::Other));
3775     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3776     PrivateVarsPos[VD] = Counter;
3777     ++Counter;
3778   }
3779   for (const Expr *E : Data.FirstprivateVars) {
3780     Args.push_back(ImplicitParamDecl::Create(
3781         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3782         C.getPointerType(C.getPointerType(E->getType()))
3783             .withConst()
3784             .withRestrict(),
3785         ImplicitParamDecl::Other));
3786     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3787     PrivateVarsPos[VD] = Counter;
3788     ++Counter;
3789   }
3790   for (const Expr *E : Data.LastprivateVars) {
3791     Args.push_back(ImplicitParamDecl::Create(
3792         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3793         C.getPointerType(C.getPointerType(E->getType()))
3794             .withConst()
3795             .withRestrict(),
3796         ImplicitParamDecl::Other));
3797     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3798     PrivateVarsPos[VD] = Counter;
3799     ++Counter;
3800   }
3801   for (const VarDecl *VD : Data.PrivateLocals) {
3802     QualType Ty = VD->getType().getNonReferenceType();
3803     if (VD->getType()->isLValueReferenceType())
3804       Ty = C.getPointerType(Ty);
3805     if (isAllocatableDecl(VD))
3806       Ty = C.getPointerType(Ty);
3807     Args.push_back(ImplicitParamDecl::Create(
3808         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3809         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3810         ImplicitParamDecl::Other));
3811     PrivateVarsPos[VD] = Counter;
3812     ++Counter;
3813   }
3814   const auto &TaskPrivatesMapFnInfo =
3815       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3816   llvm::FunctionType *TaskPrivatesMapTy =
3817       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3818   std::string Name =
3819       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3820   auto *TaskPrivatesMap = llvm::Function::Create(
3821       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3822       &CGM.getModule());
3823   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3824                                     TaskPrivatesMapFnInfo);
3825   if (CGM.getLangOpts().Optimize) {
3826     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3827     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3828     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3829   }
3830   CodeGenFunction CGF(CGM);
3831   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3832                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3833 
3834   // *privi = &.privates.privi;
3835   LValue Base = CGF.EmitLoadOfPointerLValue(
3836       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3837       TaskPrivatesArg.getType()->castAs<PointerType>());
3838   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3839   Counter = 0;
3840   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3841     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3842     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3843     LValue RefLVal =
3844         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3845     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3846         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3847     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3848     ++Counter;
3849   }
3850   CGF.FinishFunction();
3851   return TaskPrivatesMap;
3852 }
3853 
3854 /// Emit initialization for private variables in task-based directives.
3855 static void emitPrivatesInit(CodeGenFunction &CGF,
3856                              const OMPExecutableDirective &D,
3857                              Address KmpTaskSharedsPtr, LValue TDBase,
3858                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3859                              QualType SharedsTy, QualType SharedsPtrTy,
3860                              const OMPTaskDataTy &Data,
3861                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3862   ASTContext &C = CGF.getContext();
3863   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3864   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3865   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3866                                  ? OMPD_taskloop
3867                                  : OMPD_task;
3868   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3869   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3870   LValue SrcBase;
3871   bool IsTargetTask =
3872       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3873       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3874   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3875   // PointersArray, SizesArray, and MappersArray. The original variables for
3876   // these arrays are not captured and we get their addresses explicitly.
3877   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3878       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3879     SrcBase = CGF.MakeAddrLValue(
3880         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3881             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3882             CGF.ConvertTypeForMem(SharedsTy)),
3883         SharedsTy);
3884   }
3885   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3886   for (const PrivateDataTy &Pair : Privates) {
3887     // Do not initialize private locals.
3888     if (Pair.second.isLocalPrivate()) {
3889       ++FI;
3890       continue;
3891     }
3892     const VarDecl *VD = Pair.second.PrivateCopy;
3893     const Expr *Init = VD->getAnyInitializer();
3894     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3895                              !CGF.isTrivialInitializer(Init)))) {
3896       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3897       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3898         const VarDecl *OriginalVD = Pair.second.Original;
3899         // Check if the variable is the target-based BasePointersArray,
3900         // PointersArray, SizesArray, or MappersArray.
3901         LValue SharedRefLValue;
3902         QualType Type = PrivateLValue.getType();
3903         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3904         if (IsTargetTask && !SharedField) {
3905           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3906                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3907                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3908                          ->getNumParams() == 0 &&
3909                  isa<TranslationUnitDecl>(
3910                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3911                          ->getDeclContext()) &&
3912                  "Expected artificial target data variable.");
3913           SharedRefLValue =
3914               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3915         } else if (ForDup) {
3916           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3917           SharedRefLValue = CGF.MakeAddrLValue(
3918               SharedRefLValue.getAddress(CGF).withAlignment(
3919                   C.getDeclAlign(OriginalVD)),
3920               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3921               SharedRefLValue.getTBAAInfo());
3922         } else if (CGF.LambdaCaptureFields.count(
3923                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3924                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3925           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3926         } else {
3927           // Processing for implicitly captured variables.
3928           InlinedOpenMPRegionRAII Region(
3929               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3930               /*HasCancel=*/false, /*NoInheritance=*/true);
3931           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3932         }
3933         if (Type->isArrayType()) {
3934           // Initialize firstprivate array.
3935           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3936             // Perform simple memcpy.
3937             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3938           } else {
3939             // Initialize firstprivate array using element-by-element
3940             // initialization.
3941             CGF.EmitOMPAggregateAssign(
3942                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3943                 Type,
3944                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3945                                                   Address SrcElement) {
3946                   // Clean up any temporaries needed by the initialization.
3947                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3948                   InitScope.addPrivate(Elem, SrcElement);
3949                   (void)InitScope.Privatize();
3950                   // Emit initialization for single element.
3951                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3952                       CGF, &CapturesInfo);
3953                   CGF.EmitAnyExprToMem(Init, DestElement,
3954                                        Init->getType().getQualifiers(),
3955                                        /*IsInitializer=*/false);
3956                 });
3957           }
3958         } else {
3959           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3960           InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3961           (void)InitScope.Privatize();
3962           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3963           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3964                              /*capturedByInit=*/false);
3965         }
3966       } else {
3967         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3968       }
3969     }
3970     ++FI;
3971   }
3972 }
3973 
3974 /// Check if duplication function is required for taskloops.
3975 static bool checkInitIsRequired(CodeGenFunction &CGF,
3976                                 ArrayRef<PrivateDataTy> Privates) {
3977   bool InitRequired = false;
3978   for (const PrivateDataTy &Pair : Privates) {
3979     if (Pair.second.isLocalPrivate())
3980       continue;
3981     const VarDecl *VD = Pair.second.PrivateCopy;
3982     const Expr *Init = VD->getAnyInitializer();
3983     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3984                                     !CGF.isTrivialInitializer(Init));
3985     if (InitRequired)
3986       break;
3987   }
3988   return InitRequired;
3989 }
3990 
3991 
3992 /// Emit task_dup function (for initialization of
3993 /// private/firstprivate/lastprivate vars and last_iter flag)
3994 /// \code
3995 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3996 /// lastpriv) {
3997 /// // setup lastprivate flag
3998 ///    task_dst->last = lastpriv;
3999 /// // could be constructor calls here...
4000 /// }
4001 /// \endcode
4002 static llvm::Value *
4003 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4004                     const OMPExecutableDirective &D,
4005                     QualType KmpTaskTWithPrivatesPtrQTy,
4006                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4007                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4008                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4009                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4010   ASTContext &C = CGM.getContext();
4011   FunctionArgList Args;
4012   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4013                            KmpTaskTWithPrivatesPtrQTy,
4014                            ImplicitParamDecl::Other);
4015   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4016                            KmpTaskTWithPrivatesPtrQTy,
4017                            ImplicitParamDecl::Other);
4018   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4019                                 ImplicitParamDecl::Other);
4020   Args.push_back(&DstArg);
4021   Args.push_back(&SrcArg);
4022   Args.push_back(&LastprivArg);
4023   const auto &TaskDupFnInfo =
4024       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4025   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4026   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4027   auto *TaskDup = llvm::Function::Create(
4028       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4029   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4030   TaskDup->setDoesNotRecurse();
4031   CodeGenFunction CGF(CGM);
4032   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4033                     Loc);
4034 
4035   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4036       CGF.GetAddrOfLocalVar(&DstArg),
4037       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4038   // task_dst->liter = lastpriv;
4039   if (WithLastIter) {
4040     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4041     LValue Base = CGF.EmitLValueForField(
4042         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4043     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4044     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4045         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4046     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4047   }
4048 
4049   // Emit initial values for private copies (if any).
4050   assert(!Privates.empty());
4051   Address KmpTaskSharedsPtr = Address::invalid();
4052   if (!Data.FirstprivateVars.empty()) {
4053     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4054         CGF.GetAddrOfLocalVar(&SrcArg),
4055         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4056     LValue Base = CGF.EmitLValueForField(
4057         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4058     KmpTaskSharedsPtr = Address::deprecated(
4059         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4060                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4061                                                   KmpTaskTShareds)),
4062                              Loc),
4063         CGM.getNaturalTypeAlignment(SharedsTy));
4064   }
4065   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4066                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4067   CGF.FinishFunction();
4068   return TaskDup;
4069 }
4070 
4071 /// Checks if destructor function is required to be generated.
4072 /// \return true if cleanups are required, false otherwise.
4073 static bool
4074 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4075                          ArrayRef<PrivateDataTy> Privates) {
4076   for (const PrivateDataTy &P : Privates) {
4077     if (P.second.isLocalPrivate())
4078       continue;
4079     QualType Ty = P.second.Original->getType().getNonReferenceType();
4080     if (Ty.isDestructedType())
4081       return true;
4082   }
4083   return false;
4084 }
4085 
4086 namespace {
4087 /// Loop generator for OpenMP iterator expression.
4088 class OMPIteratorGeneratorScope final
4089     : public CodeGenFunction::OMPPrivateScope {
4090   CodeGenFunction &CGF;
4091   const OMPIteratorExpr *E = nullptr;
4092   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4093   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4094   OMPIteratorGeneratorScope() = delete;
4095   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4096 
4097 public:
4098   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4099       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4100     if (!E)
4101       return;
4102     SmallVector<llvm::Value *, 4> Uppers;
4103     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4104       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4105       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4106       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
4107       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4108       addPrivate(
4109           HelperData.CounterVD,
4110           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
4111     }
4112     Privatize();
4113 
4114     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4115       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4116       LValue CLVal =
4117           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4118                              HelperData.CounterVD->getType());
4119       // Counter = 0;
4120       CGF.EmitStoreOfScalar(
4121           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4122           CLVal);
4123       CodeGenFunction::JumpDest &ContDest =
4124           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4125       CodeGenFunction::JumpDest &ExitDest =
4126           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4127       // N = <number-of_iterations>;
4128       llvm::Value *N = Uppers[I];
4129       // cont:
4130       // if (Counter < N) goto body; else goto exit;
4131       CGF.EmitBlock(ContDest.getBlock());
4132       auto *CVal =
4133           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4134       llvm::Value *Cmp =
4135           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4136               ? CGF.Builder.CreateICmpSLT(CVal, N)
4137               : CGF.Builder.CreateICmpULT(CVal, N);
4138       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4139       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4140       // body:
4141       CGF.EmitBlock(BodyBB);
4142       // Iteri = Begini + Counter * Stepi;
4143       CGF.EmitIgnoredExpr(HelperData.Update);
4144     }
4145   }
4146   ~OMPIteratorGeneratorScope() {
4147     if (!E)
4148       return;
4149     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4150       // Counter = Counter + 1;
4151       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4152       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4153       // goto cont;
4154       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4155       // exit:
4156       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4157     }
4158   }
4159 };
4160 } // namespace
4161 
4162 static std::pair<llvm::Value *, llvm::Value *>
4163 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4164   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4165   llvm::Value *Addr;
4166   if (OASE) {
4167     const Expr *Base = OASE->getBase();
4168     Addr = CGF.EmitScalarExpr(Base);
4169   } else {
4170     Addr = CGF.EmitLValue(E).getPointer(CGF);
4171   }
4172   llvm::Value *SizeVal;
4173   QualType Ty = E->getType();
4174   if (OASE) {
4175     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4176     for (const Expr *SE : OASE->getDimensions()) {
4177       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4178       Sz = CGF.EmitScalarConversion(
4179           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4180       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4181     }
4182   } else if (const auto *ASE =
4183                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4184     LValue UpAddrLVal =
4185         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4186     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4187     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4188         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4189     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4190     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4191     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4192   } else {
4193     SizeVal = CGF.getTypeSize(Ty);
4194   }
4195   return std::make_pair(Addr, SizeVal);
4196 }
4197 
4198 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4199 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4200   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4201   if (KmpTaskAffinityInfoTy.isNull()) {
4202     RecordDecl *KmpAffinityInfoRD =
4203         C.buildImplicitRecord("kmp_task_affinity_info_t");
4204     KmpAffinityInfoRD->startDefinition();
4205     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4206     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4207     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4208     KmpAffinityInfoRD->completeDefinition();
4209     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4210   }
4211 }
4212 
4213 CGOpenMPRuntime::TaskResultTy
4214 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4215                               const OMPExecutableDirective &D,
4216                               llvm::Function *TaskFunction, QualType SharedsTy,
4217                               Address Shareds, const OMPTaskDataTy &Data) {
4218   ASTContext &C = CGM.getContext();
4219   llvm::SmallVector<PrivateDataTy, 4> Privates;
4220   // Aggregate privates and sort them by the alignment.
4221   const auto *I = Data.PrivateCopies.begin();
4222   for (const Expr *E : Data.PrivateVars) {
4223     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4224     Privates.emplace_back(
4225         C.getDeclAlign(VD),
4226         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4227                          /*PrivateElemInit=*/nullptr));
4228     ++I;
4229   }
4230   I = Data.FirstprivateCopies.begin();
4231   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4232   for (const Expr *E : Data.FirstprivateVars) {
4233     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4234     Privates.emplace_back(
4235         C.getDeclAlign(VD),
4236         PrivateHelpersTy(
4237             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4238             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4239     ++I;
4240     ++IElemInitRef;
4241   }
4242   I = Data.LastprivateCopies.begin();
4243   for (const Expr *E : Data.LastprivateVars) {
4244     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4245     Privates.emplace_back(
4246         C.getDeclAlign(VD),
4247         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4248                          /*PrivateElemInit=*/nullptr));
4249     ++I;
4250   }
4251   for (const VarDecl *VD : Data.PrivateLocals) {
4252     if (isAllocatableDecl(VD))
4253       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4254     else
4255       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4256   }
4257   llvm::stable_sort(Privates,
4258                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4259                       return L.first > R.first;
4260                     });
4261   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4262   // Build type kmp_routine_entry_t (if not built yet).
4263   emitKmpRoutineEntryT(KmpInt32Ty);
4264   // Build type kmp_task_t (if not built yet).
4265   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4266     if (SavedKmpTaskloopTQTy.isNull()) {
4267       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4268           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4269     }
4270     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4271   } else {
4272     assert((D.getDirectiveKind() == OMPD_task ||
4273             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4274             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4275            "Expected taskloop, task or target directive");
4276     if (SavedKmpTaskTQTy.isNull()) {
4277       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4278           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4279     }
4280     KmpTaskTQTy = SavedKmpTaskTQTy;
4281   }
4282   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4283   // Build particular struct kmp_task_t for the given task.
4284   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4285       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4286   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4287   QualType KmpTaskTWithPrivatesPtrQTy =
4288       C.getPointerType(KmpTaskTWithPrivatesQTy);
4289   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4290   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4291       KmpTaskTWithPrivatesTy->getPointerTo();
4292   llvm::Value *KmpTaskTWithPrivatesTySize =
4293       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4294   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4295 
4296   // Emit initial values for private copies (if any).
4297   llvm::Value *TaskPrivatesMap = nullptr;
4298   llvm::Type *TaskPrivatesMapTy =
4299       std::next(TaskFunction->arg_begin(), 3)->getType();
4300   if (!Privates.empty()) {
4301     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4302     TaskPrivatesMap =
4303         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4304     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4305         TaskPrivatesMap, TaskPrivatesMapTy);
4306   } else {
4307     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4308         cast<llvm::PointerType>(TaskPrivatesMapTy));
4309   }
4310   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4311   // kmp_task_t *tt);
4312   llvm::Function *TaskEntry = emitProxyTaskFunction(
4313       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4314       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4315       TaskPrivatesMap);
4316 
4317   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4318   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4319   // kmp_routine_entry_t *task_entry);
4320   // Task flags. Format is taken from
4321   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4322   // description of kmp_tasking_flags struct.
4323   enum {
4324     TiedFlag = 0x1,
4325     FinalFlag = 0x2,
4326     DestructorsFlag = 0x8,
4327     PriorityFlag = 0x20,
4328     DetachableFlag = 0x40,
4329   };
4330   unsigned Flags = Data.Tied ? TiedFlag : 0;
4331   bool NeedsCleanup = false;
4332   if (!Privates.empty()) {
4333     NeedsCleanup =
4334         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4335     if (NeedsCleanup)
4336       Flags = Flags | DestructorsFlag;
4337   }
4338   if (Data.Priority.getInt())
4339     Flags = Flags | PriorityFlag;
4340   if (D.hasClausesOfKind<OMPDetachClause>())
4341     Flags = Flags | DetachableFlag;
4342   llvm::Value *TaskFlags =
4343       Data.Final.getPointer()
4344           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4345                                      CGF.Builder.getInt32(FinalFlag),
4346                                      CGF.Builder.getInt32(/*C=*/0))
4347           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4348   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4349   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4350   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4351       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4352       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4353           TaskEntry, KmpRoutineEntryPtrTy)};
4354   llvm::Value *NewTask;
4355   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4356     // Check if we have any device clause associated with the directive.
4357     const Expr *Device = nullptr;
4358     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4359       Device = C->getDevice();
4360     // Emit device ID if any otherwise use default value.
4361     llvm::Value *DeviceID;
4362     if (Device)
4363       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4364                                            CGF.Int64Ty, /*isSigned=*/true);
4365     else
4366       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4367     AllocArgs.push_back(DeviceID);
4368     NewTask = CGF.EmitRuntimeCall(
4369         OMPBuilder.getOrCreateRuntimeFunction(
4370             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4371         AllocArgs);
4372   } else {
4373     NewTask =
4374         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4375                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4376                             AllocArgs);
4377   }
4378   // Emit detach clause initialization.
4379   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4380   // task_descriptor);
4381   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4382     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4383     LValue EvtLVal = CGF.EmitLValue(Evt);
4384 
4385     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4386     // int gtid, kmp_task_t *task);
4387     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4388     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4389     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4390     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4391         OMPBuilder.getOrCreateRuntimeFunction(
4392             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4393         {Loc, Tid, NewTask});
4394     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4395                                       Evt->getExprLoc());
4396     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4397   }
4398   // Process affinity clauses.
4399   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4400     // Process list of affinity data.
4401     ASTContext &C = CGM.getContext();
4402     Address AffinitiesArray = Address::invalid();
4403     // Calculate number of elements to form the array of affinity data.
4404     llvm::Value *NumOfElements = nullptr;
4405     unsigned NumAffinities = 0;
4406     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4407       if (const Expr *Modifier = C->getModifier()) {
4408         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4409         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4410           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4411           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4412           NumOfElements =
4413               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4414         }
4415       } else {
4416         NumAffinities += C->varlist_size();
4417       }
4418     }
4419     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4420     // Fields ids in kmp_task_affinity_info record.
4421     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4422 
4423     QualType KmpTaskAffinityInfoArrayTy;
4424     if (NumOfElements) {
4425       NumOfElements = CGF.Builder.CreateNUWAdd(
4426           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4427       auto *OVE = new (C) OpaqueValueExpr(
4428           Loc,
4429           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4430           VK_PRValue);
4431       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4432                                                     RValue::get(NumOfElements));
4433       KmpTaskAffinityInfoArrayTy =
4434           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4435                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4436       // Properly emit variable-sized array.
4437       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4438                                            ImplicitParamDecl::Other);
4439       CGF.EmitVarDecl(*PD);
4440       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4441       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4442                                                 /*isSigned=*/false);
4443     } else {
4444       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4445           KmpTaskAffinityInfoTy,
4446           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4447           ArrayType::Normal, /*IndexTypeQuals=*/0);
4448       AffinitiesArray =
4449           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4450       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4451       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4452                                              /*isSigned=*/false);
4453     }
4454 
4455     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4456     // Fill array by elements without iterators.
4457     unsigned Pos = 0;
4458     bool HasIterator = false;
4459     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4460       if (C->getModifier()) {
4461         HasIterator = true;
4462         continue;
4463       }
4464       for (const Expr *E : C->varlists()) {
4465         llvm::Value *Addr;
4466         llvm::Value *Size;
4467         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4468         LValue Base =
4469             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4470                                KmpTaskAffinityInfoTy);
4471         // affs[i].base_addr = &<Affinities[i].second>;
4472         LValue BaseAddrLVal = CGF.EmitLValueForField(
4473             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4474         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4475                               BaseAddrLVal);
4476         // affs[i].len = sizeof(<Affinities[i].second>);
4477         LValue LenLVal = CGF.EmitLValueForField(
4478             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4479         CGF.EmitStoreOfScalar(Size, LenLVal);
4480         ++Pos;
4481       }
4482     }
4483     LValue PosLVal;
4484     if (HasIterator) {
4485       PosLVal = CGF.MakeAddrLValue(
4486           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4487           C.getSizeType());
4488       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4489     }
4490     // Process elements with iterators.
4491     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4492       const Expr *Modifier = C->getModifier();
4493       if (!Modifier)
4494         continue;
4495       OMPIteratorGeneratorScope IteratorScope(
4496           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4497       for (const Expr *E : C->varlists()) {
4498         llvm::Value *Addr;
4499         llvm::Value *Size;
4500         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4501         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4502         LValue Base = CGF.MakeAddrLValue(
4503             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4504         // affs[i].base_addr = &<Affinities[i].second>;
4505         LValue BaseAddrLVal = CGF.EmitLValueForField(
4506             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4507         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4508                               BaseAddrLVal);
4509         // affs[i].len = sizeof(<Affinities[i].second>);
4510         LValue LenLVal = CGF.EmitLValueForField(
4511             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4512         CGF.EmitStoreOfScalar(Size, LenLVal);
4513         Idx = CGF.Builder.CreateNUWAdd(
4514             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4515         CGF.EmitStoreOfScalar(Idx, PosLVal);
4516       }
4517     }
4518     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4519     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4520     // naffins, kmp_task_affinity_info_t *affin_list);
4521     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4522     llvm::Value *GTid = getThreadID(CGF, Loc);
4523     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4524         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4525     // FIXME: Emit the function and ignore its result for now unless the
4526     // runtime function is properly implemented.
4527     (void)CGF.EmitRuntimeCall(
4528         OMPBuilder.getOrCreateRuntimeFunction(
4529             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4530         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4531   }
4532   llvm::Value *NewTaskNewTaskTTy =
4533       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4534           NewTask, KmpTaskTWithPrivatesPtrTy);
4535   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4536                                                KmpTaskTWithPrivatesQTy);
4537   LValue TDBase =
4538       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4539   // Fill the data in the resulting kmp_task_t record.
4540   // Copy shareds if there are any.
4541   Address KmpTaskSharedsPtr = Address::invalid();
4542   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4543     KmpTaskSharedsPtr = Address::deprecated(
4544         CGF.EmitLoadOfScalar(
4545             CGF.EmitLValueForField(
4546                 TDBase,
4547                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4548             Loc),
4549         CGM.getNaturalTypeAlignment(SharedsTy));
4550     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4551     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4552     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4553   }
4554   // Emit initial values for private copies (if any).
4555   TaskResultTy Result;
4556   if (!Privates.empty()) {
4557     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4558                      SharedsTy, SharedsPtrTy, Data, Privates,
4559                      /*ForDup=*/false);
4560     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4561         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4562       Result.TaskDupFn = emitTaskDupFunction(
4563           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4564           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4565           /*WithLastIter=*/!Data.LastprivateVars.empty());
4566     }
4567   }
4568   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4569   enum { Priority = 0, Destructors = 1 };
4570   // Provide pointer to function with destructors for privates.
4571   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4572   const RecordDecl *KmpCmplrdataUD =
4573       (*FI)->getType()->getAsUnionType()->getDecl();
4574   if (NeedsCleanup) {
4575     llvm::Value *DestructorFn = emitDestructorsFunction(
4576         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4577         KmpTaskTWithPrivatesQTy);
4578     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4579     LValue DestructorsLV = CGF.EmitLValueForField(
4580         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4581     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4582                               DestructorFn, KmpRoutineEntryPtrTy),
4583                           DestructorsLV);
4584   }
4585   // Set priority.
4586   if (Data.Priority.getInt()) {
4587     LValue Data2LV = CGF.EmitLValueForField(
4588         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4589     LValue PriorityLV = CGF.EmitLValueForField(
4590         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4591     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4592   }
4593   Result.NewTask = NewTask;
4594   Result.TaskEntry = TaskEntry;
4595   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4596   Result.TDBase = TDBase;
4597   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4598   return Result;
4599 }
4600 
4601 namespace {
4602 /// Dependence kind for RTL.
4603 enum RTLDependenceKindTy {
4604   DepIn = 0x01,
4605   DepInOut = 0x3,
4606   DepMutexInOutSet = 0x4,
4607   DepInOutSet = 0x8
4608 };
4609 /// Fields ids in kmp_depend_info record.
4610 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4611 } // namespace
4612 
4613 /// Translates internal dependency kind into the runtime kind.
4614 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4615   RTLDependenceKindTy DepKind;
4616   switch (K) {
4617   case OMPC_DEPEND_in:
4618     DepKind = DepIn;
4619     break;
4620   // Out and InOut dependencies must use the same code.
4621   case OMPC_DEPEND_out:
4622   case OMPC_DEPEND_inout:
4623     DepKind = DepInOut;
4624     break;
4625   case OMPC_DEPEND_mutexinoutset:
4626     DepKind = DepMutexInOutSet;
4627     break;
4628   case OMPC_DEPEND_inoutset:
4629     DepKind = DepInOutSet;
4630     break;
4631   case OMPC_DEPEND_source:
4632   case OMPC_DEPEND_sink:
4633   case OMPC_DEPEND_depobj:
4634   case OMPC_DEPEND_unknown:
4635     llvm_unreachable("Unknown task dependence type");
4636   }
4637   return DepKind;
4638 }
4639 
4640 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4641 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4642                            QualType &FlagsTy) {
4643   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4644   if (KmpDependInfoTy.isNull()) {
4645     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4646     KmpDependInfoRD->startDefinition();
4647     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4648     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4649     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4650     KmpDependInfoRD->completeDefinition();
4651     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4652   }
4653 }
4654 
4655 std::pair<llvm::Value *, LValue>
4656 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4657                                    SourceLocation Loc) {
4658   ASTContext &C = CGM.getContext();
4659   QualType FlagsTy;
4660   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4661   RecordDecl *KmpDependInfoRD =
4662       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4663   LValue Base = CGF.EmitLoadOfPointerLValue(
4664       DepobjLVal.getAddress(CGF),
4665       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4666   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4667   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4668       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4669       CGF.ConvertTypeForMem(KmpDependInfoTy));
4670   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4671                             Base.getTBAAInfo());
4672   Address DepObjAddr = CGF.Builder.CreateGEP(
4673       Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4674   LValue NumDepsBase = CGF.MakeAddrLValue(
4675       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4676   // NumDeps = deps[i].base_addr;
4677   LValue BaseAddrLVal = CGF.EmitLValueForField(
4678       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4679   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4680   return std::make_pair(NumDeps, Base);
4681 }
4682 
4683 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4684                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4685                            const OMPTaskDataTy::DependData &Data,
4686                            Address DependenciesArray) {
4687   CodeGenModule &CGM = CGF.CGM;
4688   ASTContext &C = CGM.getContext();
4689   QualType FlagsTy;
4690   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4691   RecordDecl *KmpDependInfoRD =
4692       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4693   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4694 
4695   OMPIteratorGeneratorScope IteratorScope(
4696       CGF, cast_or_null<OMPIteratorExpr>(
4697                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4698                                  : nullptr));
4699   for (const Expr *E : Data.DepExprs) {
4700     llvm::Value *Addr;
4701     llvm::Value *Size;
4702     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4703     LValue Base;
4704     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4705       Base = CGF.MakeAddrLValue(
4706           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4707     } else {
4708       LValue &PosLVal = *Pos.get<LValue *>();
4709       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4710       Base = CGF.MakeAddrLValue(
4711           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4712     }
4713     // deps[i].base_addr = &<Dependencies[i].second>;
4714     LValue BaseAddrLVal = CGF.EmitLValueForField(
4715         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4716     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4717                           BaseAddrLVal);
4718     // deps[i].len = sizeof(<Dependencies[i].second>);
4719     LValue LenLVal = CGF.EmitLValueForField(
4720         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4721     CGF.EmitStoreOfScalar(Size, LenLVal);
4722     // deps[i].flags = <Dependencies[i].first>;
4723     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4724     LValue FlagsLVal = CGF.EmitLValueForField(
4725         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4726     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4727                           FlagsLVal);
4728     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4729       ++(*P);
4730     } else {
4731       LValue &PosLVal = *Pos.get<LValue *>();
4732       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4733       Idx = CGF.Builder.CreateNUWAdd(Idx,
4734                                      llvm::ConstantInt::get(Idx->getType(), 1));
4735       CGF.EmitStoreOfScalar(Idx, PosLVal);
4736     }
4737   }
4738 }
4739 
4740 static SmallVector<llvm::Value *, 4>
4741 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4742                         const OMPTaskDataTy::DependData &Data) {
4743   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4744          "Expected depobj dependecy kind.");
4745   SmallVector<llvm::Value *, 4> Sizes;
4746   SmallVector<LValue, 4> SizeLVals;
4747   ASTContext &C = CGF.getContext();
4748   QualType FlagsTy;
4749   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4750   RecordDecl *KmpDependInfoRD =
4751       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4752   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4753   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4754   {
4755     OMPIteratorGeneratorScope IteratorScope(
4756         CGF, cast_or_null<OMPIteratorExpr>(
4757                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4758                                    : nullptr));
4759     for (const Expr *E : Data.DepExprs) {
4760       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4761       LValue Base = CGF.EmitLoadOfPointerLValue(
4762           DepobjLVal.getAddress(CGF),
4763           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4764       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4765           Base.getAddress(CGF), KmpDependInfoPtrT,
4766           CGF.ConvertTypeForMem(KmpDependInfoTy));
4767       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4768                                 Base.getTBAAInfo());
4769       Address DepObjAddr = CGF.Builder.CreateGEP(
4770           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4771       LValue NumDepsBase = CGF.MakeAddrLValue(
4772           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4773       // NumDeps = deps[i].base_addr;
4774       LValue BaseAddrLVal = CGF.EmitLValueForField(
4775           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4776       llvm::Value *NumDeps =
4777           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4778       LValue NumLVal = CGF.MakeAddrLValue(
4779           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4780           C.getUIntPtrType());
4781       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4782                               NumLVal.getAddress(CGF));
4783       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4784       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4785       CGF.EmitStoreOfScalar(Add, NumLVal);
4786       SizeLVals.push_back(NumLVal);
4787     }
4788   }
4789   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4790     llvm::Value *Size =
4791         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4792     Sizes.push_back(Size);
4793   }
4794   return Sizes;
4795 }
4796 
4797 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4798                                LValue PosLVal,
4799                                const OMPTaskDataTy::DependData &Data,
4800                                Address DependenciesArray) {
4801   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4802          "Expected depobj dependecy kind.");
4803   ASTContext &C = CGF.getContext();
4804   QualType FlagsTy;
4805   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4806   RecordDecl *KmpDependInfoRD =
4807       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4808   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4809   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4810   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4811   {
4812     OMPIteratorGeneratorScope IteratorScope(
4813         CGF, cast_or_null<OMPIteratorExpr>(
4814                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4815                                    : nullptr));
4816     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4817       const Expr *E = Data.DepExprs[I];
4818       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4819       LValue Base = CGF.EmitLoadOfPointerLValue(
4820           DepobjLVal.getAddress(CGF),
4821           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4822       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4823           Base.getAddress(CGF), KmpDependInfoPtrT,
4824           CGF.ConvertTypeForMem(KmpDependInfoTy));
4825       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4826                                 Base.getTBAAInfo());
4827 
4828       // Get number of elements in a single depobj.
4829       Address DepObjAddr = CGF.Builder.CreateGEP(
4830           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4831       LValue NumDepsBase = CGF.MakeAddrLValue(
4832           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4833       // NumDeps = deps[i].base_addr;
4834       LValue BaseAddrLVal = CGF.EmitLValueForField(
4835           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4836       llvm::Value *NumDeps =
4837           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4838 
4839       // memcopy dependency data.
4840       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4841           ElSize,
4842           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4843       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4844       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4845       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4846 
4847       // Increase pos.
4848       // pos += size;
4849       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4850       CGF.EmitStoreOfScalar(Add, PosLVal);
4851     }
4852   }
4853 }
4854 
4855 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4856     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4857     SourceLocation Loc) {
4858   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4859         return D.DepExprs.empty();
4860       }))
4861     return std::make_pair(nullptr, Address::invalid());
4862   // Process list of dependencies.
4863   ASTContext &C = CGM.getContext();
4864   Address DependenciesArray = Address::invalid();
4865   llvm::Value *NumOfElements = nullptr;
4866   unsigned NumDependencies = std::accumulate(
4867       Dependencies.begin(), Dependencies.end(), 0,
4868       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4869         return D.DepKind == OMPC_DEPEND_depobj
4870                    ? V
4871                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4872       });
4873   QualType FlagsTy;
4874   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4875   bool HasDepobjDeps = false;
4876   bool HasRegularWithIterators = false;
4877   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4878   llvm::Value *NumOfRegularWithIterators =
4879       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4880   // Calculate number of depobj dependecies and regular deps with the iterators.
4881   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4882     if (D.DepKind == OMPC_DEPEND_depobj) {
4883       SmallVector<llvm::Value *, 4> Sizes =
4884           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4885       for (llvm::Value *Size : Sizes) {
4886         NumOfDepobjElements =
4887             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4888       }
4889       HasDepobjDeps = true;
4890       continue;
4891     }
4892     // Include number of iterations, if any.
4893 
4894     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4895       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4896         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4897         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4898         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4899             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4900         NumOfRegularWithIterators =
4901             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4902       }
4903       HasRegularWithIterators = true;
4904       continue;
4905     }
4906   }
4907 
4908   QualType KmpDependInfoArrayTy;
4909   if (HasDepobjDeps || HasRegularWithIterators) {
4910     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4911                                            /*isSigned=*/false);
4912     if (HasDepobjDeps) {
4913       NumOfElements =
4914           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4915     }
4916     if (HasRegularWithIterators) {
4917       NumOfElements =
4918           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4919     }
4920     auto *OVE = new (C) OpaqueValueExpr(
4921         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4922         VK_PRValue);
4923     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4924                                                   RValue::get(NumOfElements));
4925     KmpDependInfoArrayTy =
4926         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4927                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4928     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4929     // Properly emit variable-sized array.
4930     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4931                                          ImplicitParamDecl::Other);
4932     CGF.EmitVarDecl(*PD);
4933     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4934     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4935                                               /*isSigned=*/false);
4936   } else {
4937     KmpDependInfoArrayTy = C.getConstantArrayType(
4938         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4939         ArrayType::Normal, /*IndexTypeQuals=*/0);
4940     DependenciesArray =
4941         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4942     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4943     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4944                                            /*isSigned=*/false);
4945   }
4946   unsigned Pos = 0;
4947   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4948     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4949         Dependencies[I].IteratorExpr)
4950       continue;
4951     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4952                    DependenciesArray);
4953   }
4954   // Copy regular dependecies with iterators.
4955   LValue PosLVal = CGF.MakeAddrLValue(
4956       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4957   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4958   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4959     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4960         !Dependencies[I].IteratorExpr)
4961       continue;
4962     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4963                    DependenciesArray);
4964   }
4965   // Copy final depobj arrays without iterators.
4966   if (HasDepobjDeps) {
4967     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4968       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4969         continue;
4970       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4971                          DependenciesArray);
4972     }
4973   }
4974   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4975       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4976   return std::make_pair(NumOfElements, DependenciesArray);
4977 }
4978 
4979 Address CGOpenMPRuntime::emitDepobjDependClause(
4980     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4981     SourceLocation Loc) {
4982   if (Dependencies.DepExprs.empty())
4983     return Address::invalid();
4984   // Process list of dependencies.
4985   ASTContext &C = CGM.getContext();
4986   Address DependenciesArray = Address::invalid();
4987   unsigned NumDependencies = Dependencies.DepExprs.size();
4988   QualType FlagsTy;
4989   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4990   RecordDecl *KmpDependInfoRD =
4991       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4992 
4993   llvm::Value *Size;
4994   // Define type kmp_depend_info[<Dependencies.size()>];
4995   // For depobj reserve one extra element to store the number of elements.
4996   // It is required to handle depobj(x) update(in) construct.
4997   // kmp_depend_info[<Dependencies.size()>] deps;
4998   llvm::Value *NumDepsVal;
4999   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
5000   if (const auto *IE =
5001           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
5002     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
5003     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5004       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5005       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5006       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5007     }
5008     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5009                                     NumDepsVal);
5010     CharUnits SizeInBytes =
5011         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5012     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5013     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5014     NumDepsVal =
5015         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5016   } else {
5017     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5018         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5019         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5020     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5021     Size = CGM.getSize(Sz.alignTo(Align));
5022     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5023   }
5024   // Need to allocate on the dynamic memory.
5025   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5026   // Use default allocator.
5027   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5028   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5029 
5030   llvm::Value *Addr =
5031       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5032                               CGM.getModule(), OMPRTL___kmpc_alloc),
5033                           Args, ".dep.arr.addr");
5034   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5035       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5036   DependenciesArray = Address::deprecated(Addr, Align);
5037   // Write number of elements in the first element of array for depobj.
5038   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5039   // deps[i].base_addr = NumDependencies;
5040   LValue BaseAddrLVal = CGF.EmitLValueForField(
5041       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5042   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5043   llvm::PointerUnion<unsigned *, LValue *> Pos;
5044   unsigned Idx = 1;
5045   LValue PosLVal;
5046   if (Dependencies.IteratorExpr) {
5047     PosLVal = CGF.MakeAddrLValue(
5048         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5049         C.getSizeType());
5050     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5051                           /*IsInit=*/true);
5052     Pos = &PosLVal;
5053   } else {
5054     Pos = &Idx;
5055   }
5056   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5057   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5058       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
5059       CGF.Int8Ty);
5060   return DependenciesArray;
5061 }
5062 
5063 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5064                                         SourceLocation Loc) {
5065   ASTContext &C = CGM.getContext();
5066   QualType FlagsTy;
5067   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5068   LValue Base = CGF.EmitLoadOfPointerLValue(
5069       DepobjLVal.getAddress(CGF),
5070       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5071   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5072   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5073       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
5074       CGF.ConvertTypeForMem(KmpDependInfoTy));
5075   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5076       Addr.getElementType(), Addr.getPointer(),
5077       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5078   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5079                                                                CGF.VoidPtrTy);
5080   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5081   // Use default allocator.
5082   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5083   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5084 
5085   // _kmpc_free(gtid, addr, nullptr);
5086   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5087                                 CGM.getModule(), OMPRTL___kmpc_free),
5088                             Args);
5089 }
5090 
5091 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5092                                        OpenMPDependClauseKind NewDepKind,
5093                                        SourceLocation Loc) {
5094   ASTContext &C = CGM.getContext();
5095   QualType FlagsTy;
5096   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5097   RecordDecl *KmpDependInfoRD =
5098       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5099   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5100   llvm::Value *NumDeps;
5101   LValue Base;
5102   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5103 
5104   Address Begin = Base.getAddress(CGF);
5105   // Cast from pointer to array type to pointer to single element.
5106   llvm::Value *End = CGF.Builder.CreateGEP(
5107       Begin.getElementType(), Begin.getPointer(), NumDeps);
5108   // The basic structure here is a while-do loop.
5109   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5110   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5111   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5112   CGF.EmitBlock(BodyBB);
5113   llvm::PHINode *ElementPHI =
5114       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5115   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5116   Begin = Begin.withPointer(ElementPHI);
5117   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5118                             Base.getTBAAInfo());
5119   // deps[i].flags = NewDepKind;
5120   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5121   LValue FlagsLVal = CGF.EmitLValueForField(
5122       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5123   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5124                         FlagsLVal);
5125 
5126   // Shift the address forward by one element.
5127   Address ElementNext =
5128       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5129   ElementPHI->addIncoming(ElementNext.getPointer(),
5130                           CGF.Builder.GetInsertBlock());
5131   llvm::Value *IsEmpty =
5132       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5133   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5134   // Done.
5135   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5136 }
5137 
5138 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5139                                    const OMPExecutableDirective &D,
5140                                    llvm::Function *TaskFunction,
5141                                    QualType SharedsTy, Address Shareds,
5142                                    const Expr *IfCond,
5143                                    const OMPTaskDataTy &Data) {
5144   if (!CGF.HaveInsertPoint())
5145     return;
5146 
5147   TaskResultTy Result =
5148       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5149   llvm::Value *NewTask = Result.NewTask;
5150   llvm::Function *TaskEntry = Result.TaskEntry;
5151   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5152   LValue TDBase = Result.TDBase;
5153   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5154   // Process list of dependences.
5155   Address DependenciesArray = Address::invalid();
5156   llvm::Value *NumOfElements;
5157   std::tie(NumOfElements, DependenciesArray) =
5158       emitDependClause(CGF, Data.Dependences, Loc);
5159 
5160   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5161   // libcall.
5162   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5163   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5164   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5165   // list is not empty
5166   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5167   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5168   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5169   llvm::Value *DepTaskArgs[7];
5170   if (!Data.Dependences.empty()) {
5171     DepTaskArgs[0] = UpLoc;
5172     DepTaskArgs[1] = ThreadID;
5173     DepTaskArgs[2] = NewTask;
5174     DepTaskArgs[3] = NumOfElements;
5175     DepTaskArgs[4] = DependenciesArray.getPointer();
5176     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5177     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5178   }
5179   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5180                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5181     if (!Data.Tied) {
5182       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5183       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5184       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5185     }
5186     if (!Data.Dependences.empty()) {
5187       CGF.EmitRuntimeCall(
5188           OMPBuilder.getOrCreateRuntimeFunction(
5189               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5190           DepTaskArgs);
5191     } else {
5192       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5193                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5194                           TaskArgs);
5195     }
5196     // Check if parent region is untied and build return for untied task;
5197     if (auto *Region =
5198             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5199       Region->emitUntiedSwitch(CGF);
5200   };
5201 
5202   llvm::Value *DepWaitTaskArgs[6];
5203   if (!Data.Dependences.empty()) {
5204     DepWaitTaskArgs[0] = UpLoc;
5205     DepWaitTaskArgs[1] = ThreadID;
5206     DepWaitTaskArgs[2] = NumOfElements;
5207     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5208     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5209     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5210   }
5211   auto &M = CGM.getModule();
5212   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5213                         TaskEntry, &Data, &DepWaitTaskArgs,
5214                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5215     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5216     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5217     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5218     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5219     // is specified.
5220     if (!Data.Dependences.empty())
5221       CGF.EmitRuntimeCall(
5222           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5223           DepWaitTaskArgs);
5224     // Call proxy_task_entry(gtid, new_task);
5225     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5226                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5227       Action.Enter(CGF);
5228       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5229       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5230                                                           OutlinedFnArgs);
5231     };
5232 
5233     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5234     // kmp_task_t *new_task);
5235     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5236     // kmp_task_t *new_task);
5237     RegionCodeGenTy RCG(CodeGen);
5238     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5239                               M, OMPRTL___kmpc_omp_task_begin_if0),
5240                           TaskArgs,
5241                           OMPBuilder.getOrCreateRuntimeFunction(
5242                               M, OMPRTL___kmpc_omp_task_complete_if0),
5243                           TaskArgs);
5244     RCG.setAction(Action);
5245     RCG(CGF);
5246   };
5247 
5248   if (IfCond) {
5249     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5250   } else {
5251     RegionCodeGenTy ThenRCG(ThenCodeGen);
5252     ThenRCG(CGF);
5253   }
5254 }
5255 
5256 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5257                                        const OMPLoopDirective &D,
5258                                        llvm::Function *TaskFunction,
5259                                        QualType SharedsTy, Address Shareds,
5260                                        const Expr *IfCond,
5261                                        const OMPTaskDataTy &Data) {
5262   if (!CGF.HaveInsertPoint())
5263     return;
5264   TaskResultTy Result =
5265       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5266   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5267   // libcall.
5268   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5269   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5270   // sched, kmp_uint64 grainsize, void *task_dup);
5271   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5272   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5273   llvm::Value *IfVal;
5274   if (IfCond) {
5275     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5276                                       /*isSigned=*/true);
5277   } else {
5278     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5279   }
5280 
5281   LValue LBLVal = CGF.EmitLValueForField(
5282       Result.TDBase,
5283       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5284   const auto *LBVar =
5285       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5286   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5287                        LBLVal.getQuals(),
5288                        /*IsInitializer=*/true);
5289   LValue UBLVal = CGF.EmitLValueForField(
5290       Result.TDBase,
5291       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5292   const auto *UBVar =
5293       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5294   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5295                        UBLVal.getQuals(),
5296                        /*IsInitializer=*/true);
5297   LValue StLVal = CGF.EmitLValueForField(
5298       Result.TDBase,
5299       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5300   const auto *StVar =
5301       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5302   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5303                        StLVal.getQuals(),
5304                        /*IsInitializer=*/true);
5305   // Store reductions address.
5306   LValue RedLVal = CGF.EmitLValueForField(
5307       Result.TDBase,
5308       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5309   if (Data.Reductions) {
5310     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5311   } else {
5312     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5313                                CGF.getContext().VoidPtrTy);
5314   }
5315   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5316   llvm::Value *TaskArgs[] = {
5317       UpLoc,
5318       ThreadID,
5319       Result.NewTask,
5320       IfVal,
5321       LBLVal.getPointer(CGF),
5322       UBLVal.getPointer(CGF),
5323       CGF.EmitLoadOfScalar(StLVal, Loc),
5324       llvm::ConstantInt::getSigned(
5325           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5326       llvm::ConstantInt::getSigned(
5327           CGF.IntTy, Data.Schedule.getPointer()
5328                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5329                          : NoSchedule),
5330       Data.Schedule.getPointer()
5331           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5332                                       /*isSigned=*/false)
5333           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5334       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5335                              Result.TaskDupFn, CGF.VoidPtrTy)
5336                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5337   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5338                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5339                       TaskArgs);
5340 }
5341 
5342 /// Emit reduction operation for each element of array (required for
5343 /// array sections) LHS op = RHS.
5344 /// \param Type Type of array.
5345 /// \param LHSVar Variable on the left side of the reduction operation
5346 /// (references element of array in original variable).
5347 /// \param RHSVar Variable on the right side of the reduction operation
5348 /// (references element of array in original variable).
5349 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5350 /// RHSVar.
5351 static void EmitOMPAggregateReduction(
5352     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5353     const VarDecl *RHSVar,
5354     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5355                                   const Expr *, const Expr *)> &RedOpGen,
5356     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5357     const Expr *UpExpr = nullptr) {
5358   // Perform element-by-element initialization.
5359   QualType ElementTy;
5360   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5361   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5362 
5363   // Drill down to the base element type on both arrays.
5364   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5365   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5366 
5367   llvm::Value *RHSBegin = RHSAddr.getPointer();
5368   llvm::Value *LHSBegin = LHSAddr.getPointer();
5369   // Cast from pointer to array type to pointer to single element.
5370   llvm::Value *LHSEnd =
5371       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5372   // The basic structure here is a while-do loop.
5373   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5374   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5375   llvm::Value *IsEmpty =
5376       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5377   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5378 
5379   // Enter the loop body, making that address the current address.
5380   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5381   CGF.EmitBlock(BodyBB);
5382 
5383   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5384 
5385   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5386       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5387   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5388   Address RHSElementCurrent = Address::deprecated(
5389       RHSElementPHI,
5390       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5391 
5392   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5393       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5394   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5395   Address LHSElementCurrent = Address::deprecated(
5396       LHSElementPHI,
5397       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5398 
5399   // Emit copy.
5400   CodeGenFunction::OMPPrivateScope Scope(CGF);
5401   Scope.addPrivate(LHSVar, LHSElementCurrent);
5402   Scope.addPrivate(RHSVar, RHSElementCurrent);
5403   Scope.Privatize();
5404   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5405   Scope.ForceCleanup();
5406 
5407   // Shift the address forward by one element.
5408   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5409       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5410       "omp.arraycpy.dest.element");
5411   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5412       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5413       "omp.arraycpy.src.element");
5414   // Check whether we've reached the end.
5415   llvm::Value *Done =
5416       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5417   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5418   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5419   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5420 
5421   // Done.
5422   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5423 }
5424 
5425 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5426 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5427 /// UDR combiner function.
5428 static void emitReductionCombiner(CodeGenFunction &CGF,
5429                                   const Expr *ReductionOp) {
5430   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5431     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5432       if (const auto *DRE =
5433               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5434         if (const auto *DRD =
5435                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5436           std::pair<llvm::Function *, llvm::Function *> Reduction =
5437               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5438           RValue Func = RValue::get(Reduction.first);
5439           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5440           CGF.EmitIgnoredExpr(ReductionOp);
5441           return;
5442         }
5443   CGF.EmitIgnoredExpr(ReductionOp);
5444 }
5445 
5446 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5447     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5448     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5449     ArrayRef<const Expr *> ReductionOps) {
5450   ASTContext &C = CGM.getContext();
5451 
5452   // void reduction_func(void *LHSArg, void *RHSArg);
5453   FunctionArgList Args;
5454   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5455                            ImplicitParamDecl::Other);
5456   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5457                            ImplicitParamDecl::Other);
5458   Args.push_back(&LHSArg);
5459   Args.push_back(&RHSArg);
5460   const auto &CGFI =
5461       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5462   std::string Name = getName({"omp", "reduction", "reduction_func"});
5463   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5464                                     llvm::GlobalValue::InternalLinkage, Name,
5465                                     &CGM.getModule());
5466   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5467   Fn->setDoesNotRecurse();
5468   CodeGenFunction CGF(CGM);
5469   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5470 
5471   // Dst = (void*[n])(LHSArg);
5472   // Src = (void*[n])(RHSArg);
5473   Address LHS = Address::deprecated(
5474       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5475           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType),
5476       CGF.getPointerAlign());
5477   Address RHS = Address::deprecated(
5478       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5479           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType),
5480       CGF.getPointerAlign());
5481 
5482   //  ...
5483   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5484   //  ...
5485   CodeGenFunction::OMPPrivateScope Scope(CGF);
5486   const auto *IPriv = Privates.begin();
5487   unsigned Idx = 0;
5488   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5489     const auto *RHSVar =
5490         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5491     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5492     const auto *LHSVar =
5493         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5494     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5495     QualType PrivTy = (*IPriv)->getType();
5496     if (PrivTy->isVariablyModifiedType()) {
5497       // Get array size and emit VLA type.
5498       ++Idx;
5499       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5500       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5501       const VariableArrayType *VLA =
5502           CGF.getContext().getAsVariableArrayType(PrivTy);
5503       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5504       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5505           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5506       CGF.EmitVariablyModifiedType(PrivTy);
5507     }
5508   }
5509   Scope.Privatize();
5510   IPriv = Privates.begin();
5511   const auto *ILHS = LHSExprs.begin();
5512   const auto *IRHS = RHSExprs.begin();
5513   for (const Expr *E : ReductionOps) {
5514     if ((*IPriv)->getType()->isArrayType()) {
5515       // Emit reduction for array section.
5516       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5517       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5518       EmitOMPAggregateReduction(
5519           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5520           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5521             emitReductionCombiner(CGF, E);
5522           });
5523     } else {
5524       // Emit reduction for array subscript or single variable.
5525       emitReductionCombiner(CGF, E);
5526     }
5527     ++IPriv;
5528     ++ILHS;
5529     ++IRHS;
5530   }
5531   Scope.ForceCleanup();
5532   CGF.FinishFunction();
5533   return Fn;
5534 }
5535 
5536 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5537                                                   const Expr *ReductionOp,
5538                                                   const Expr *PrivateRef,
5539                                                   const DeclRefExpr *LHS,
5540                                                   const DeclRefExpr *RHS) {
5541   if (PrivateRef->getType()->isArrayType()) {
5542     // Emit reduction for array section.
5543     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5544     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5545     EmitOMPAggregateReduction(
5546         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5547         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5548           emitReductionCombiner(CGF, ReductionOp);
5549         });
5550   } else {
5551     // Emit reduction for array subscript or single variable.
5552     emitReductionCombiner(CGF, ReductionOp);
5553   }
5554 }
5555 
5556 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5557                                     ArrayRef<const Expr *> Privates,
5558                                     ArrayRef<const Expr *> LHSExprs,
5559                                     ArrayRef<const Expr *> RHSExprs,
5560                                     ArrayRef<const Expr *> ReductionOps,
5561                                     ReductionOptionsTy Options) {
5562   if (!CGF.HaveInsertPoint())
5563     return;
5564 
5565   bool WithNowait = Options.WithNowait;
5566   bool SimpleReduction = Options.SimpleReduction;
5567 
5568   // Next code should be emitted for reduction:
5569   //
5570   // static kmp_critical_name lock = { 0 };
5571   //
5572   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5573   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5574   //  ...
5575   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5576   //  *(Type<n>-1*)rhs[<n>-1]);
5577   // }
5578   //
5579   // ...
5580   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5581   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5582   // RedList, reduce_func, &<lock>)) {
5583   // case 1:
5584   //  ...
5585   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5586   //  ...
5587   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5588   // break;
5589   // case 2:
5590   //  ...
5591   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5592   //  ...
5593   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5594   // break;
5595   // default:;
5596   // }
5597   //
5598   // if SimpleReduction is true, only the next code is generated:
5599   //  ...
5600   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5601   //  ...
5602 
5603   ASTContext &C = CGM.getContext();
5604 
5605   if (SimpleReduction) {
5606     CodeGenFunction::RunCleanupsScope Scope(CGF);
5607     const auto *IPriv = Privates.begin();
5608     const auto *ILHS = LHSExprs.begin();
5609     const auto *IRHS = RHSExprs.begin();
5610     for (const Expr *E : ReductionOps) {
5611       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5612                                   cast<DeclRefExpr>(*IRHS));
5613       ++IPriv;
5614       ++ILHS;
5615       ++IRHS;
5616     }
5617     return;
5618   }
5619 
5620   // 1. Build a list of reduction variables.
5621   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5622   auto Size = RHSExprs.size();
5623   for (const Expr *E : Privates) {
5624     if (E->getType()->isVariablyModifiedType())
5625       // Reserve place for array size.
5626       ++Size;
5627   }
5628   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5629   QualType ReductionArrayTy =
5630       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5631                              /*IndexTypeQuals=*/0);
5632   Address ReductionList =
5633       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5634   const auto *IPriv = Privates.begin();
5635   unsigned Idx = 0;
5636   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5637     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5638     CGF.Builder.CreateStore(
5639         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5640             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5641         Elem);
5642     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5643       // Store array size.
5644       ++Idx;
5645       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5646       llvm::Value *Size = CGF.Builder.CreateIntCast(
5647           CGF.getVLASize(
5648                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5649               .NumElts,
5650           CGF.SizeTy, /*isSigned=*/false);
5651       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5652                               Elem);
5653     }
5654   }
5655 
5656   // 2. Emit reduce_func().
5657   llvm::Function *ReductionFn = emitReductionFunction(
5658       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5659       LHSExprs, RHSExprs, ReductionOps);
5660 
5661   // 3. Create static kmp_critical_name lock = { 0 };
5662   std::string Name = getName({"reduction"});
5663   llvm::Value *Lock = getCriticalRegionLock(Name);
5664 
5665   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5666   // RedList, reduce_func, &<lock>);
5667   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5668   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5669   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5670   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5671       ReductionList.getPointer(), CGF.VoidPtrTy);
5672   llvm::Value *Args[] = {
5673       IdentTLoc,                             // ident_t *<loc>
5674       ThreadId,                              // i32 <gtid>
5675       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5676       ReductionArrayTySize,                  // size_type sizeof(RedList)
5677       RL,                                    // void *RedList
5678       ReductionFn, // void (*) (void *, void *) <reduce_func>
5679       Lock         // kmp_critical_name *&<lock>
5680   };
5681   llvm::Value *Res = CGF.EmitRuntimeCall(
5682       OMPBuilder.getOrCreateRuntimeFunction(
5683           CGM.getModule(),
5684           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5685       Args);
5686 
5687   // 5. Build switch(res)
5688   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5689   llvm::SwitchInst *SwInst =
5690       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5691 
5692   // 6. Build case 1:
5693   //  ...
5694   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5695   //  ...
5696   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5697   // break;
5698   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5699   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5700   CGF.EmitBlock(Case1BB);
5701 
5702   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5703   llvm::Value *EndArgs[] = {
5704       IdentTLoc, // ident_t *<loc>
5705       ThreadId,  // i32 <gtid>
5706       Lock       // kmp_critical_name *&<lock>
5707   };
5708   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5709                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5710     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5711     const auto *IPriv = Privates.begin();
5712     const auto *ILHS = LHSExprs.begin();
5713     const auto *IRHS = RHSExprs.begin();
5714     for (const Expr *E : ReductionOps) {
5715       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5716                                      cast<DeclRefExpr>(*IRHS));
5717       ++IPriv;
5718       ++ILHS;
5719       ++IRHS;
5720     }
5721   };
5722   RegionCodeGenTy RCG(CodeGen);
5723   CommonActionTy Action(
5724       nullptr, llvm::None,
5725       OMPBuilder.getOrCreateRuntimeFunction(
5726           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5727                                       : OMPRTL___kmpc_end_reduce),
5728       EndArgs);
5729   RCG.setAction(Action);
5730   RCG(CGF);
5731 
5732   CGF.EmitBranch(DefaultBB);
5733 
5734   // 7. Build case 2:
5735   //  ...
5736   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5737   //  ...
5738   // break;
5739   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5740   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5741   CGF.EmitBlock(Case2BB);
5742 
5743   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5744                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5745     const auto *ILHS = LHSExprs.begin();
5746     const auto *IRHS = RHSExprs.begin();
5747     const auto *IPriv = Privates.begin();
5748     for (const Expr *E : ReductionOps) {
5749       const Expr *XExpr = nullptr;
5750       const Expr *EExpr = nullptr;
5751       const Expr *UpExpr = nullptr;
5752       BinaryOperatorKind BO = BO_Comma;
5753       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5754         if (BO->getOpcode() == BO_Assign) {
5755           XExpr = BO->getLHS();
5756           UpExpr = BO->getRHS();
5757         }
5758       }
5759       // Try to emit update expression as a simple atomic.
5760       const Expr *RHSExpr = UpExpr;
5761       if (RHSExpr) {
5762         // Analyze RHS part of the whole expression.
5763         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5764                 RHSExpr->IgnoreParenImpCasts())) {
5765           // If this is a conditional operator, analyze its condition for
5766           // min/max reduction operator.
5767           RHSExpr = ACO->getCond();
5768         }
5769         if (const auto *BORHS =
5770                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5771           EExpr = BORHS->getRHS();
5772           BO = BORHS->getOpcode();
5773         }
5774       }
5775       if (XExpr) {
5776         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5777         auto &&AtomicRedGen = [BO, VD,
5778                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5779                                     const Expr *EExpr, const Expr *UpExpr) {
5780           LValue X = CGF.EmitLValue(XExpr);
5781           RValue E;
5782           if (EExpr)
5783             E = CGF.EmitAnyExpr(EExpr);
5784           CGF.EmitOMPAtomicSimpleUpdateExpr(
5785               X, E, BO, /*IsXLHSInRHSPart=*/true,
5786               llvm::AtomicOrdering::Monotonic, Loc,
5787               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5788                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5789                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5790                 CGF.emitOMPSimpleStore(
5791                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5792                     VD->getType().getNonReferenceType(), Loc);
5793                 PrivateScope.addPrivate(VD, LHSTemp);
5794                 (void)PrivateScope.Privatize();
5795                 return CGF.EmitAnyExpr(UpExpr);
5796               });
5797         };
5798         if ((*IPriv)->getType()->isArrayType()) {
5799           // Emit atomic reduction for array section.
5800           const auto *RHSVar =
5801               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5802           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5803                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5804         } else {
5805           // Emit atomic reduction for array subscript or single variable.
5806           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5807         }
5808       } else {
5809         // Emit as a critical region.
5810         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5811                                            const Expr *, const Expr *) {
5812           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5813           std::string Name = RT.getName({"atomic_reduction"});
5814           RT.emitCriticalRegion(
5815               CGF, Name,
5816               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5817                 Action.Enter(CGF);
5818                 emitReductionCombiner(CGF, E);
5819               },
5820               Loc);
5821         };
5822         if ((*IPriv)->getType()->isArrayType()) {
5823           const auto *LHSVar =
5824               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5825           const auto *RHSVar =
5826               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5827           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5828                                     CritRedGen);
5829         } else {
5830           CritRedGen(CGF, nullptr, nullptr, nullptr);
5831         }
5832       }
5833       ++ILHS;
5834       ++IRHS;
5835       ++IPriv;
5836     }
5837   };
5838   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5839   if (!WithNowait) {
5840     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5841     llvm::Value *EndArgs[] = {
5842         IdentTLoc, // ident_t *<loc>
5843         ThreadId,  // i32 <gtid>
5844         Lock       // kmp_critical_name *&<lock>
5845     };
5846     CommonActionTy Action(nullptr, llvm::None,
5847                           OMPBuilder.getOrCreateRuntimeFunction(
5848                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5849                           EndArgs);
5850     AtomicRCG.setAction(Action);
5851     AtomicRCG(CGF);
5852   } else {
5853     AtomicRCG(CGF);
5854   }
5855 
5856   CGF.EmitBranch(DefaultBB);
5857   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5858 }
5859 
5860 /// Generates unique name for artificial threadprivate variables.
5861 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5862 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5863                                       const Expr *Ref) {
5864   SmallString<256> Buffer;
5865   llvm::raw_svector_ostream Out(Buffer);
5866   const clang::DeclRefExpr *DE;
5867   const VarDecl *D = ::getBaseDecl(Ref, DE);
5868   if (!D)
5869     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5870   D = D->getCanonicalDecl();
5871   std::string Name = CGM.getOpenMPRuntime().getName(
5872       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5873   Out << Prefix << Name << "_"
5874       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5875   return std::string(Out.str());
5876 }
5877 
5878 /// Emits reduction initializer function:
5879 /// \code
5880 /// void @.red_init(void* %arg, void* %orig) {
5881 /// %0 = bitcast void* %arg to <type>*
5882 /// store <type> <init>, <type>* %0
5883 /// ret void
5884 /// }
5885 /// \endcode
5886 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5887                                            SourceLocation Loc,
5888                                            ReductionCodeGen &RCG, unsigned N) {
5889   ASTContext &C = CGM.getContext();
5890   QualType VoidPtrTy = C.VoidPtrTy;
5891   VoidPtrTy.addRestrict();
5892   FunctionArgList Args;
5893   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5894                           ImplicitParamDecl::Other);
5895   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5896                               ImplicitParamDecl::Other);
5897   Args.emplace_back(&Param);
5898   Args.emplace_back(&ParamOrig);
5899   const auto &FnInfo =
5900       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5901   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5902   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5903   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5904                                     Name, &CGM.getModule());
5905   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5906   Fn->setDoesNotRecurse();
5907   CodeGenFunction CGF(CGM);
5908   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5909   Address PrivateAddr = CGF.EmitLoadOfPointer(
5910       CGF.GetAddrOfLocalVar(&Param),
5911       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5912   llvm::Value *Size = nullptr;
5913   // If the size of the reduction item is non-constant, load it from global
5914   // threadprivate variable.
5915   if (RCG.getSizes(N).second) {
5916     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5917         CGF, CGM.getContext().getSizeType(),
5918         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5919     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5920                                 CGM.getContext().getSizeType(), Loc);
5921   }
5922   RCG.emitAggregateType(CGF, N, Size);
5923   Address OrigAddr = Address::invalid();
5924   // If initializer uses initializer from declare reduction construct, emit a
5925   // pointer to the address of the original reduction item (reuired by reduction
5926   // initializer)
5927   if (RCG.usesReductionInitializer(N)) {
5928     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5929     OrigAddr = CGF.EmitLoadOfPointer(
5930         SharedAddr,
5931         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5932   }
5933   // Emit the initializer:
5934   // %0 = bitcast void* %arg to <type>*
5935   // store <type> <init>, <type>* %0
5936   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5937                          [](CodeGenFunction &) { return false; });
5938   CGF.FinishFunction();
5939   return Fn;
5940 }
5941 
5942 /// Emits reduction combiner function:
5943 /// \code
5944 /// void @.red_comb(void* %arg0, void* %arg1) {
5945 /// %lhs = bitcast void* %arg0 to <type>*
5946 /// %rhs = bitcast void* %arg1 to <type>*
5947 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5948 /// store <type> %2, <type>* %lhs
5949 /// ret void
5950 /// }
5951 /// \endcode
5952 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5953                                            SourceLocation Loc,
5954                                            ReductionCodeGen &RCG, unsigned N,
5955                                            const Expr *ReductionOp,
5956                                            const Expr *LHS, const Expr *RHS,
5957                                            const Expr *PrivateRef) {
5958   ASTContext &C = CGM.getContext();
5959   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5960   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5961   FunctionArgList Args;
5962   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5963                                C.VoidPtrTy, ImplicitParamDecl::Other);
5964   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5965                             ImplicitParamDecl::Other);
5966   Args.emplace_back(&ParamInOut);
5967   Args.emplace_back(&ParamIn);
5968   const auto &FnInfo =
5969       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5970   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5971   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5972   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5973                                     Name, &CGM.getModule());
5974   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5975   Fn->setDoesNotRecurse();
5976   CodeGenFunction CGF(CGM);
5977   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5978   llvm::Value *Size = nullptr;
5979   // If the size of the reduction item is non-constant, load it from global
5980   // threadprivate variable.
5981   if (RCG.getSizes(N).second) {
5982     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5983         CGF, CGM.getContext().getSizeType(),
5984         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5985     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5986                                 CGM.getContext().getSizeType(), Loc);
5987   }
5988   RCG.emitAggregateType(CGF, N, Size);
5989   // Remap lhs and rhs variables to the addresses of the function arguments.
5990   // %lhs = bitcast void* %arg0 to <type>*
5991   // %rhs = bitcast void* %arg1 to <type>*
5992   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5993   PrivateScope.addPrivate(
5994       LHSVD,
5995       // Pull out the pointer to the variable.
5996       CGF.Builder.CreateElementBitCast(
5997           CGF.EmitLoadOfPointer(
5998               CGF.GetAddrOfLocalVar(&ParamInOut),
5999               C.getPointerType(C.VoidPtrTy).castAs<PointerType>()),
6000           CGF.ConvertTypeForMem(LHSVD->getType())));
6001   PrivateScope.addPrivate(
6002       RHSVD,
6003       // Pull out the pointer to the variable.
6004       CGF.Builder.CreateElementBitCast(
6005           CGF.EmitLoadOfPointer(
6006               CGF.GetAddrOfLocalVar(&ParamIn),
6007               C.getPointerType(C.VoidPtrTy).castAs<PointerType>()),
6008           CGF.ConvertTypeForMem(RHSVD->getType())));
6009   PrivateScope.Privatize();
6010   // Emit the combiner body:
6011   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6012   // store <type> %2, <type>* %lhs
6013   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6014       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6015       cast<DeclRefExpr>(RHS));
6016   CGF.FinishFunction();
6017   return Fn;
6018 }
6019 
6020 /// Emits reduction finalizer function:
6021 /// \code
6022 /// void @.red_fini(void* %arg) {
6023 /// %0 = bitcast void* %arg to <type>*
6024 /// <destroy>(<type>* %0)
6025 /// ret void
6026 /// }
6027 /// \endcode
6028 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6029                                            SourceLocation Loc,
6030                                            ReductionCodeGen &RCG, unsigned N) {
6031   if (!RCG.needCleanups(N))
6032     return nullptr;
6033   ASTContext &C = CGM.getContext();
6034   FunctionArgList Args;
6035   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6036                           ImplicitParamDecl::Other);
6037   Args.emplace_back(&Param);
6038   const auto &FnInfo =
6039       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6040   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6041   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6042   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6043                                     Name, &CGM.getModule());
6044   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6045   Fn->setDoesNotRecurse();
6046   CodeGenFunction CGF(CGM);
6047   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6048   Address PrivateAddr = CGF.EmitLoadOfPointer(
6049       CGF.GetAddrOfLocalVar(&Param),
6050       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6051   llvm::Value *Size = nullptr;
6052   // If the size of the reduction item is non-constant, load it from global
6053   // threadprivate variable.
6054   if (RCG.getSizes(N).second) {
6055     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6056         CGF, CGM.getContext().getSizeType(),
6057         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6058     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6059                                 CGM.getContext().getSizeType(), Loc);
6060   }
6061   RCG.emitAggregateType(CGF, N, Size);
6062   // Emit the finalizer body:
6063   // <destroy>(<type>* %0)
6064   RCG.emitCleanups(CGF, N, PrivateAddr);
6065   CGF.FinishFunction(Loc);
6066   return Fn;
6067 }
6068 
6069 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6070     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6071     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6072   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6073     return nullptr;
6074 
6075   // Build typedef struct:
6076   // kmp_taskred_input {
6077   //   void *reduce_shar; // shared reduction item
6078   //   void *reduce_orig; // original reduction item used for initialization
6079   //   size_t reduce_size; // size of data item
6080   //   void *reduce_init; // data initialization routine
6081   //   void *reduce_fini; // data finalization routine
6082   //   void *reduce_comb; // data combiner routine
6083   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6084   // } kmp_taskred_input_t;
6085   ASTContext &C = CGM.getContext();
6086   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6087   RD->startDefinition();
6088   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6089   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6090   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6091   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6092   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6093   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6094   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6095       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6096   RD->completeDefinition();
6097   QualType RDType = C.getRecordType(RD);
6098   unsigned Size = Data.ReductionVars.size();
6099   llvm::APInt ArraySize(/*numBits=*/64, Size);
6100   QualType ArrayRDType = C.getConstantArrayType(
6101       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6102   // kmp_task_red_input_t .rd_input.[Size];
6103   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6104   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6105                        Data.ReductionCopies, Data.ReductionOps);
6106   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6107     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6108     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6109                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6110     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6111         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6112         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6113         ".rd_input.gep.");
6114     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6115     // ElemLVal.reduce_shar = &Shareds[Cnt];
6116     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6117     RCG.emitSharedOrigLValue(CGF, Cnt);
6118     llvm::Value *CastedShared =
6119         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6120     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6121     // ElemLVal.reduce_orig = &Origs[Cnt];
6122     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6123     llvm::Value *CastedOrig =
6124         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6125     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6126     RCG.emitAggregateType(CGF, Cnt);
6127     llvm::Value *SizeValInChars;
6128     llvm::Value *SizeVal;
6129     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6130     // We use delayed creation/initialization for VLAs and array sections. It is
6131     // required because runtime does not provide the way to pass the sizes of
6132     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6133     // threadprivate global variables are used to store these values and use
6134     // them in the functions.
6135     bool DelayedCreation = !!SizeVal;
6136     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6137                                                /*isSigned=*/false);
6138     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6139     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6140     // ElemLVal.reduce_init = init;
6141     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6142     llvm::Value *InitAddr =
6143         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6144     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6145     // ElemLVal.reduce_fini = fini;
6146     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6147     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6148     llvm::Value *FiniAddr = Fini
6149                                 ? CGF.EmitCastToVoidPtr(Fini)
6150                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6151     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6152     // ElemLVal.reduce_comb = comb;
6153     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6154     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6155         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6156         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6157     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6158     // ElemLVal.flags = 0;
6159     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6160     if (DelayedCreation) {
6161       CGF.EmitStoreOfScalar(
6162           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6163           FlagsLVal);
6164     } else
6165       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6166                                  FlagsLVal.getType());
6167   }
6168   if (Data.IsReductionWithTaskMod) {
6169     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6170     // is_ws, int num, void *data);
6171     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6172     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6173                                                   CGM.IntTy, /*isSigned=*/true);
6174     llvm::Value *Args[] = {
6175         IdentTLoc, GTid,
6176         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6177                                /*isSigned=*/true),
6178         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6179         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6180             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6181     return CGF.EmitRuntimeCall(
6182         OMPBuilder.getOrCreateRuntimeFunction(
6183             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6184         Args);
6185   }
6186   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6187   llvm::Value *Args[] = {
6188       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6189                                 /*isSigned=*/true),
6190       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6191       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6192                                                       CGM.VoidPtrTy)};
6193   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6194                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6195                              Args);
6196 }
6197 
6198 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6199                                             SourceLocation Loc,
6200                                             bool IsWorksharingReduction) {
6201   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6202   // is_ws, int num, void *data);
6203   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6204   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6205                                                 CGM.IntTy, /*isSigned=*/true);
6206   llvm::Value *Args[] = {IdentTLoc, GTid,
6207                          llvm::ConstantInt::get(CGM.IntTy,
6208                                                 IsWorksharingReduction ? 1 : 0,
6209                                                 /*isSigned=*/true)};
6210   (void)CGF.EmitRuntimeCall(
6211       OMPBuilder.getOrCreateRuntimeFunction(
6212           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6213       Args);
6214 }
6215 
6216 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6217                                               SourceLocation Loc,
6218                                               ReductionCodeGen &RCG,
6219                                               unsigned N) {
6220   auto Sizes = RCG.getSizes(N);
6221   // Emit threadprivate global variable if the type is non-constant
6222   // (Sizes.second = nullptr).
6223   if (Sizes.second) {
6224     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6225                                                      /*isSigned=*/false);
6226     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6227         CGF, CGM.getContext().getSizeType(),
6228         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6229     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6230   }
6231 }
6232 
6233 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6234                                               SourceLocation Loc,
6235                                               llvm::Value *ReductionsPtr,
6236                                               LValue SharedLVal) {
6237   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6238   // *d);
6239   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6240                                                    CGM.IntTy,
6241                                                    /*isSigned=*/true),
6242                          ReductionsPtr,
6243                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6244                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6245   return Address::deprecated(
6246       CGF.EmitRuntimeCall(
6247           OMPBuilder.getOrCreateRuntimeFunction(
6248               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6249           Args),
6250       SharedLVal.getAlignment());
6251 }
6252 
6253 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6254                                        const OMPTaskDataTy &Data) {
6255   if (!CGF.HaveInsertPoint())
6256     return;
6257 
6258   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6259     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6260     OMPBuilder.createTaskwait(CGF.Builder);
6261   } else {
6262     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6263     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6264     auto &M = CGM.getModule();
6265     Address DependenciesArray = Address::invalid();
6266     llvm::Value *NumOfElements;
6267     std::tie(NumOfElements, DependenciesArray) =
6268         emitDependClause(CGF, Data.Dependences, Loc);
6269     llvm::Value *DepWaitTaskArgs[6];
6270     if (!Data.Dependences.empty()) {
6271       DepWaitTaskArgs[0] = UpLoc;
6272       DepWaitTaskArgs[1] = ThreadID;
6273       DepWaitTaskArgs[2] = NumOfElements;
6274       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6275       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6276       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6277 
6278       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6279 
6280       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6281       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6282       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6283       // is specified.
6284       CGF.EmitRuntimeCall(
6285           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6286           DepWaitTaskArgs);
6287 
6288     } else {
6289 
6290       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6291       // global_tid);
6292       llvm::Value *Args[] = {UpLoc, ThreadID};
6293       // Ignore return result until untied tasks are supported.
6294       CGF.EmitRuntimeCall(
6295           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6296           Args);
6297     }
6298   }
6299 
6300   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6301     Region->emitUntiedSwitch(CGF);
6302 }
6303 
6304 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6305                                            OpenMPDirectiveKind InnerKind,
6306                                            const RegionCodeGenTy &CodeGen,
6307                                            bool HasCancel) {
6308   if (!CGF.HaveInsertPoint())
6309     return;
6310   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6311                                  InnerKind != OMPD_critical &&
6312                                      InnerKind != OMPD_master &&
6313                                      InnerKind != OMPD_masked);
6314   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6315 }
6316 
6317 namespace {
6318 enum RTCancelKind {
6319   CancelNoreq = 0,
6320   CancelParallel = 1,
6321   CancelLoop = 2,
6322   CancelSections = 3,
6323   CancelTaskgroup = 4
6324 };
6325 } // anonymous namespace
6326 
6327 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6328   RTCancelKind CancelKind = CancelNoreq;
6329   if (CancelRegion == OMPD_parallel)
6330     CancelKind = CancelParallel;
6331   else if (CancelRegion == OMPD_for)
6332     CancelKind = CancelLoop;
6333   else if (CancelRegion == OMPD_sections)
6334     CancelKind = CancelSections;
6335   else {
6336     assert(CancelRegion == OMPD_taskgroup);
6337     CancelKind = CancelTaskgroup;
6338   }
6339   return CancelKind;
6340 }
6341 
6342 void CGOpenMPRuntime::emitCancellationPointCall(
6343     CodeGenFunction &CGF, SourceLocation Loc,
6344     OpenMPDirectiveKind CancelRegion) {
6345   if (!CGF.HaveInsertPoint())
6346     return;
6347   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6348   // global_tid, kmp_int32 cncl_kind);
6349   if (auto *OMPRegionInfo =
6350           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6351     // For 'cancellation point taskgroup', the task region info may not have a
6352     // cancel. This may instead happen in another adjacent task.
6353     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6354       llvm::Value *Args[] = {
6355           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6356           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6357       // Ignore return result until untied tasks are supported.
6358       llvm::Value *Result = CGF.EmitRuntimeCall(
6359           OMPBuilder.getOrCreateRuntimeFunction(
6360               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6361           Args);
6362       // if (__kmpc_cancellationpoint()) {
6363       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6364       //   exit from construct;
6365       // }
6366       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6367       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6368       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6369       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6370       CGF.EmitBlock(ExitBB);
6371       if (CancelRegion == OMPD_parallel)
6372         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6373       // exit from construct;
6374       CodeGenFunction::JumpDest CancelDest =
6375           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6376       CGF.EmitBranchThroughCleanup(CancelDest);
6377       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6378     }
6379   }
6380 }
6381 
6382 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6383                                      const Expr *IfCond,
6384                                      OpenMPDirectiveKind CancelRegion) {
6385   if (!CGF.HaveInsertPoint())
6386     return;
6387   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6388   // kmp_int32 cncl_kind);
6389   auto &M = CGM.getModule();
6390   if (auto *OMPRegionInfo =
6391           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6392     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6393                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6394       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6395       llvm::Value *Args[] = {
6396           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6397           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6398       // Ignore return result until untied tasks are supported.
6399       llvm::Value *Result = CGF.EmitRuntimeCall(
6400           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6401       // if (__kmpc_cancel()) {
6402       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6403       //   exit from construct;
6404       // }
6405       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6406       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6407       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6408       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6409       CGF.EmitBlock(ExitBB);
6410       if (CancelRegion == OMPD_parallel)
6411         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6412       // exit from construct;
6413       CodeGenFunction::JumpDest CancelDest =
6414           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6415       CGF.EmitBranchThroughCleanup(CancelDest);
6416       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6417     };
6418     if (IfCond) {
6419       emitIfClause(CGF, IfCond, ThenGen,
6420                    [](CodeGenFunction &, PrePostActionTy &) {});
6421     } else {
6422       RegionCodeGenTy ThenRCG(ThenGen);
6423       ThenRCG(CGF);
6424     }
6425   }
6426 }
6427 
6428 namespace {
6429 /// Cleanup action for uses_allocators support.
6430 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6431   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6432 
6433 public:
6434   OMPUsesAllocatorsActionTy(
6435       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6436       : Allocators(Allocators) {}
6437   void Enter(CodeGenFunction &CGF) override {
6438     if (!CGF.HaveInsertPoint())
6439       return;
6440     for (const auto &AllocatorData : Allocators) {
6441       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6442           CGF, AllocatorData.first, AllocatorData.second);
6443     }
6444   }
6445   void Exit(CodeGenFunction &CGF) override {
6446     if (!CGF.HaveInsertPoint())
6447       return;
6448     for (const auto &AllocatorData : Allocators) {
6449       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6450                                                         AllocatorData.first);
6451     }
6452   }
6453 };
6454 } // namespace
6455 
6456 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6457     const OMPExecutableDirective &D, StringRef ParentName,
6458     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6459     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6460   assert(!ParentName.empty() && "Invalid target region parent name!");
6461   HasEmittedTargetRegion = true;
6462   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6463   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6464     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6465       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6466       if (!D.AllocatorTraits)
6467         continue;
6468       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6469     }
6470   }
6471   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6472   CodeGen.setAction(UsesAllocatorAction);
6473   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6474                                    IsOffloadEntry, CodeGen);
6475 }
6476 
6477 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6478                                              const Expr *Allocator,
6479                                              const Expr *AllocatorTraits) {
6480   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6481   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6482   // Use default memspace handle.
6483   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6484   llvm::Value *NumTraits = llvm::ConstantInt::get(
6485       CGF.IntTy, cast<ConstantArrayType>(
6486                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6487                      ->getSize()
6488                      .getLimitedValue());
6489   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6490   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6491       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6492   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6493                                            AllocatorTraitsLVal.getBaseInfo(),
6494                                            AllocatorTraitsLVal.getTBAAInfo());
6495   llvm::Value *Traits =
6496       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6497 
6498   llvm::Value *AllocatorVal =
6499       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6500                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6501                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6502   // Store to allocator.
6503   CGF.EmitVarDecl(*cast<VarDecl>(
6504       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6505   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6506   AllocatorVal =
6507       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6508                                Allocator->getType(), Allocator->getExprLoc());
6509   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6510 }
6511 
6512 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6513                                              const Expr *Allocator) {
6514   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6515   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6516   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6517   llvm::Value *AllocatorVal =
6518       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6519   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6520                                           CGF.getContext().VoidPtrTy,
6521                                           Allocator->getExprLoc());
6522   (void)CGF.EmitRuntimeCall(
6523       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6524                                             OMPRTL___kmpc_destroy_allocator),
6525       {ThreadId, AllocatorVal});
6526 }
6527 
6528 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6529     const OMPExecutableDirective &D, StringRef ParentName,
6530     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6531     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6532   // Create a unique name for the entry function using the source location
6533   // information of the current target region. The name will be something like:
6534   //
6535   // __omp_offloading_DD_FFFF_PP_lBB
6536   //
6537   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6538   // mangled name of the function that encloses the target region and BB is the
6539   // line number of the target region.
6540 
6541   const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6542                                !CGM.getLangOpts().OpenMPOffloadMandatory;
6543   unsigned DeviceID;
6544   unsigned FileID;
6545   unsigned Line;
6546   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6547                            Line);
6548   SmallString<64> EntryFnName;
6549   {
6550     llvm::raw_svector_ostream OS(EntryFnName);
6551     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6552        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6553   }
6554 
6555   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6556 
6557   CodeGenFunction CGF(CGM, true);
6558   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6559   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6560 
6561   if (BuildOutlinedFn)
6562     OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6563 
6564   // If this target outline function is not an offload entry, we don't need to
6565   // register it.
6566   if (!IsOffloadEntry)
6567     return;
6568 
6569   // The target region ID is used by the runtime library to identify the current
6570   // target region, so it only has to be unique and not necessarily point to
6571   // anything. It could be the pointer to the outlined function that implements
6572   // the target region, but we aren't using that so that the compiler doesn't
6573   // need to keep that, and could therefore inline the host function if proven
6574   // worthwhile during optimization. In the other hand, if emitting code for the
6575   // device, the ID has to be the function address so that it can retrieved from
6576   // the offloading entry and launched by the runtime library. We also mark the
6577   // outlined function to have external linkage in case we are emitting code for
6578   // the device, because these functions will be entry points to the device.
6579 
6580   if (CGM.getLangOpts().OpenMPIsDevice) {
6581     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6582     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6583     OutlinedFn->setDSOLocal(false);
6584     if (CGM.getTriple().isAMDGCN())
6585       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6586   } else {
6587     std::string Name = getName({EntryFnName, "region_id"});
6588     OutlinedFnID = new llvm::GlobalVariable(
6589         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6590         llvm::GlobalValue::WeakAnyLinkage,
6591         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6592   }
6593 
6594   // If we do not allow host fallback we still need a named address to use.
6595   llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6596   if (!BuildOutlinedFn) {
6597     assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
6598            "Named kernel already exists?");
6599     TargetRegionEntryAddr = new llvm::GlobalVariable(
6600         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6601         llvm::GlobalValue::InternalLinkage,
6602         llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6603   }
6604 
6605   // Register the information for the entry associated with this target region.
6606   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6607       DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
6608       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6609 
6610   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6611   int32_t DefaultValTeams = -1;
6612   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6613   if (DefaultValTeams > 0 && OutlinedFn) {
6614     OutlinedFn->addFnAttr("omp_target_num_teams",
6615                           std::to_string(DefaultValTeams));
6616   }
6617   int32_t DefaultValThreads = -1;
6618   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6619   if (DefaultValThreads > 0 && OutlinedFn) {
6620     OutlinedFn->addFnAttr("omp_target_thread_limit",
6621                           std::to_string(DefaultValThreads));
6622   }
6623 
6624   if (BuildOutlinedFn)
6625     CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6626 }
6627 
6628 /// Checks if the expression is constant or does not have non-trivial function
6629 /// calls.
6630 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6631   // We can skip constant expressions.
6632   // We can skip expressions with trivial calls or simple expressions.
6633   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6634           !E->hasNonTrivialCall(Ctx)) &&
6635          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6636 }
6637 
6638 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6639                                                     const Stmt *Body) {
6640   const Stmt *Child = Body->IgnoreContainers();
6641   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6642     Child = nullptr;
6643     for (const Stmt *S : C->body()) {
6644       if (const auto *E = dyn_cast<Expr>(S)) {
6645         if (isTrivial(Ctx, E))
6646           continue;
6647       }
6648       // Some of the statements can be ignored.
6649       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6650           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6651         continue;
6652       // Analyze declarations.
6653       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6654         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6655               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6656                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6657                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6658                   isa<UsingDirectiveDecl>(D) ||
6659                   isa<OMPDeclareReductionDecl>(D) ||
6660                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6661                 return true;
6662               const auto *VD = dyn_cast<VarDecl>(D);
6663               if (!VD)
6664                 return false;
6665               return VD->hasGlobalStorage() || !VD->isUsed();
6666             }))
6667           continue;
6668       }
6669       // Found multiple children - cannot get the one child only.
6670       if (Child)
6671         return nullptr;
6672       Child = S;
6673     }
6674     if (Child)
6675       Child = Child->IgnoreContainers();
6676   }
6677   return Child;
6678 }
6679 
6680 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6681     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6682     int32_t &DefaultVal) {
6683 
6684   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6685   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6686          "Expected target-based executable directive.");
6687   switch (DirectiveKind) {
6688   case OMPD_target: {
6689     const auto *CS = D.getInnermostCapturedStmt();
6690     const auto *Body =
6691         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6692     const Stmt *ChildStmt =
6693         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6694     if (const auto *NestedDir =
6695             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6696       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6697         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6698           const Expr *NumTeams =
6699               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6700           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6701             if (auto Constant =
6702                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6703               DefaultVal = Constant->getExtValue();
6704           return NumTeams;
6705         }
6706         DefaultVal = 0;
6707         return nullptr;
6708       }
6709       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6710           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6711         DefaultVal = 1;
6712         return nullptr;
6713       }
6714       DefaultVal = 1;
6715       return nullptr;
6716     }
6717     // A value of -1 is used to check if we need to emit no teams region
6718     DefaultVal = -1;
6719     return nullptr;
6720   }
6721   case OMPD_target_teams:
6722   case OMPD_target_teams_distribute:
6723   case OMPD_target_teams_distribute_simd:
6724   case OMPD_target_teams_distribute_parallel_for:
6725   case OMPD_target_teams_distribute_parallel_for_simd: {
6726     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6727       const Expr *NumTeams =
6728           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6729       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6730         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6731           DefaultVal = Constant->getExtValue();
6732       return NumTeams;
6733     }
6734     DefaultVal = 0;
6735     return nullptr;
6736   }
6737   case OMPD_target_parallel:
6738   case OMPD_target_parallel_for:
6739   case OMPD_target_parallel_for_simd:
6740   case OMPD_target_simd:
6741     DefaultVal = 1;
6742     return nullptr;
6743   case OMPD_parallel:
6744   case OMPD_for:
6745   case OMPD_parallel_for:
6746   case OMPD_parallel_master:
6747   case OMPD_parallel_sections:
6748   case OMPD_for_simd:
6749   case OMPD_parallel_for_simd:
6750   case OMPD_cancel:
6751   case OMPD_cancellation_point:
6752   case OMPD_ordered:
6753   case OMPD_threadprivate:
6754   case OMPD_allocate:
6755   case OMPD_task:
6756   case OMPD_simd:
6757   case OMPD_tile:
6758   case OMPD_unroll:
6759   case OMPD_sections:
6760   case OMPD_section:
6761   case OMPD_single:
6762   case OMPD_master:
6763   case OMPD_critical:
6764   case OMPD_taskyield:
6765   case OMPD_barrier:
6766   case OMPD_taskwait:
6767   case OMPD_taskgroup:
6768   case OMPD_atomic:
6769   case OMPD_flush:
6770   case OMPD_depobj:
6771   case OMPD_scan:
6772   case OMPD_teams:
6773   case OMPD_target_data:
6774   case OMPD_target_exit_data:
6775   case OMPD_target_enter_data:
6776   case OMPD_distribute:
6777   case OMPD_distribute_simd:
6778   case OMPD_distribute_parallel_for:
6779   case OMPD_distribute_parallel_for_simd:
6780   case OMPD_teams_distribute:
6781   case OMPD_teams_distribute_simd:
6782   case OMPD_teams_distribute_parallel_for:
6783   case OMPD_teams_distribute_parallel_for_simd:
6784   case OMPD_target_update:
6785   case OMPD_declare_simd:
6786   case OMPD_declare_variant:
6787   case OMPD_begin_declare_variant:
6788   case OMPD_end_declare_variant:
6789   case OMPD_declare_target:
6790   case OMPD_end_declare_target:
6791   case OMPD_declare_reduction:
6792   case OMPD_declare_mapper:
6793   case OMPD_taskloop:
6794   case OMPD_taskloop_simd:
6795   case OMPD_master_taskloop:
6796   case OMPD_master_taskloop_simd:
6797   case OMPD_parallel_master_taskloop:
6798   case OMPD_parallel_master_taskloop_simd:
6799   case OMPD_requires:
6800   case OMPD_metadirective:
6801   case OMPD_unknown:
6802     break;
6803   default:
6804     break;
6805   }
6806   llvm_unreachable("Unexpected directive kind.");
6807 }
6808 
6809 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6810     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6811   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6812          "Clauses associated with the teams directive expected to be emitted "
6813          "only for the host!");
6814   CGBuilderTy &Bld = CGF.Builder;
6815   int32_t DefaultNT = -1;
6816   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6817   if (NumTeams != nullptr) {
6818     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6819 
6820     switch (DirectiveKind) {
6821     case OMPD_target: {
6822       const auto *CS = D.getInnermostCapturedStmt();
6823       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6824       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6825       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6826                                                   /*IgnoreResultAssign*/ true);
6827       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6828                              /*isSigned=*/true);
6829     }
6830     case OMPD_target_teams:
6831     case OMPD_target_teams_distribute:
6832     case OMPD_target_teams_distribute_simd:
6833     case OMPD_target_teams_distribute_parallel_for:
6834     case OMPD_target_teams_distribute_parallel_for_simd: {
6835       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6836       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6837                                                   /*IgnoreResultAssign*/ true);
6838       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6839                              /*isSigned=*/true);
6840     }
6841     default:
6842       break;
6843     }
6844   } else if (DefaultNT == -1) {
6845     return nullptr;
6846   }
6847 
6848   return Bld.getInt32(DefaultNT);
6849 }
6850 
6851 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6852                                   llvm::Value *DefaultThreadLimitVal) {
6853   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6854       CGF.getContext(), CS->getCapturedStmt());
6855   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6856     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6857       llvm::Value *NumThreads = nullptr;
6858       llvm::Value *CondVal = nullptr;
6859       // Handle if clause. If if clause present, the number of threads is
6860       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6861       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6862         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6863         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6864         const OMPIfClause *IfClause = nullptr;
6865         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6866           if (C->getNameModifier() == OMPD_unknown ||
6867               C->getNameModifier() == OMPD_parallel) {
6868             IfClause = C;
6869             break;
6870           }
6871         }
6872         if (IfClause) {
6873           const Expr *Cond = IfClause->getCondition();
6874           bool Result;
6875           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6876             if (!Result)
6877               return CGF.Builder.getInt32(1);
6878           } else {
6879             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6880             if (const auto *PreInit =
6881                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6882               for (const auto *I : PreInit->decls()) {
6883                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6884                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6885                 } else {
6886                   CodeGenFunction::AutoVarEmission Emission =
6887                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6888                   CGF.EmitAutoVarCleanups(Emission);
6889                 }
6890               }
6891             }
6892             CondVal = CGF.EvaluateExprAsBool(Cond);
6893           }
6894         }
6895       }
6896       // Check the value of num_threads clause iff if clause was not specified
6897       // or is not evaluated to false.
6898       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6899         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6900         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6901         const auto *NumThreadsClause =
6902             Dir->getSingleClause<OMPNumThreadsClause>();
6903         CodeGenFunction::LexicalScope Scope(
6904             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6905         if (const auto *PreInit =
6906                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6907           for (const auto *I : PreInit->decls()) {
6908             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6909               CGF.EmitVarDecl(cast<VarDecl>(*I));
6910             } else {
6911               CodeGenFunction::AutoVarEmission Emission =
6912                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6913               CGF.EmitAutoVarCleanups(Emission);
6914             }
6915           }
6916         }
6917         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6918         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6919                                                /*isSigned=*/false);
6920         if (DefaultThreadLimitVal)
6921           NumThreads = CGF.Builder.CreateSelect(
6922               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6923               DefaultThreadLimitVal, NumThreads);
6924       } else {
6925         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6926                                            : CGF.Builder.getInt32(0);
6927       }
6928       // Process condition of the if clause.
6929       if (CondVal) {
6930         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6931                                               CGF.Builder.getInt32(1));
6932       }
6933       return NumThreads;
6934     }
6935     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6936       return CGF.Builder.getInt32(1);
6937     return DefaultThreadLimitVal;
6938   }
6939   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6940                                : CGF.Builder.getInt32(0);
6941 }
6942 
6943 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6944     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6945     int32_t &DefaultVal) {
6946   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6947   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6948          "Expected target-based executable directive.");
6949 
6950   switch (DirectiveKind) {
6951   case OMPD_target:
6952     // Teams have no clause thread_limit
6953     return nullptr;
6954   case OMPD_target_teams:
6955   case OMPD_target_teams_distribute:
6956     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6957       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6958       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6959       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6960         if (auto Constant =
6961                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6962           DefaultVal = Constant->getExtValue();
6963       return ThreadLimit;
6964     }
6965     return nullptr;
6966   case OMPD_target_parallel:
6967   case OMPD_target_parallel_for:
6968   case OMPD_target_parallel_for_simd:
6969   case OMPD_target_teams_distribute_parallel_for:
6970   case OMPD_target_teams_distribute_parallel_for_simd: {
6971     Expr *ThreadLimit = nullptr;
6972     Expr *NumThreads = nullptr;
6973     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6974       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6975       ThreadLimit = ThreadLimitClause->getThreadLimit();
6976       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6977         if (auto Constant =
6978                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6979           DefaultVal = Constant->getExtValue();
6980     }
6981     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6982       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6983       NumThreads = NumThreadsClause->getNumThreads();
6984       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6985         if (auto Constant =
6986                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6987           if (Constant->getExtValue() < DefaultVal) {
6988             DefaultVal = Constant->getExtValue();
6989             ThreadLimit = NumThreads;
6990           }
6991         }
6992       }
6993     }
6994     return ThreadLimit;
6995   }
6996   case OMPD_target_teams_distribute_simd:
6997   case OMPD_target_simd:
6998     DefaultVal = 1;
6999     return nullptr;
7000   case OMPD_parallel:
7001   case OMPD_for:
7002   case OMPD_parallel_for:
7003   case OMPD_parallel_master:
7004   case OMPD_parallel_sections:
7005   case OMPD_for_simd:
7006   case OMPD_parallel_for_simd:
7007   case OMPD_cancel:
7008   case OMPD_cancellation_point:
7009   case OMPD_ordered:
7010   case OMPD_threadprivate:
7011   case OMPD_allocate:
7012   case OMPD_task:
7013   case OMPD_simd:
7014   case OMPD_tile:
7015   case OMPD_unroll:
7016   case OMPD_sections:
7017   case OMPD_section:
7018   case OMPD_single:
7019   case OMPD_master:
7020   case OMPD_critical:
7021   case OMPD_taskyield:
7022   case OMPD_barrier:
7023   case OMPD_taskwait:
7024   case OMPD_taskgroup:
7025   case OMPD_atomic:
7026   case OMPD_flush:
7027   case OMPD_depobj:
7028   case OMPD_scan:
7029   case OMPD_teams:
7030   case OMPD_target_data:
7031   case OMPD_target_exit_data:
7032   case OMPD_target_enter_data:
7033   case OMPD_distribute:
7034   case OMPD_distribute_simd:
7035   case OMPD_distribute_parallel_for:
7036   case OMPD_distribute_parallel_for_simd:
7037   case OMPD_teams_distribute:
7038   case OMPD_teams_distribute_simd:
7039   case OMPD_teams_distribute_parallel_for:
7040   case OMPD_teams_distribute_parallel_for_simd:
7041   case OMPD_target_update:
7042   case OMPD_declare_simd:
7043   case OMPD_declare_variant:
7044   case OMPD_begin_declare_variant:
7045   case OMPD_end_declare_variant:
7046   case OMPD_declare_target:
7047   case OMPD_end_declare_target:
7048   case OMPD_declare_reduction:
7049   case OMPD_declare_mapper:
7050   case OMPD_taskloop:
7051   case OMPD_taskloop_simd:
7052   case OMPD_master_taskloop:
7053   case OMPD_master_taskloop_simd:
7054   case OMPD_parallel_master_taskloop:
7055   case OMPD_parallel_master_taskloop_simd:
7056   case OMPD_requires:
7057   case OMPD_unknown:
7058     break;
7059   default:
7060     break;
7061   }
7062   llvm_unreachable("Unsupported directive kind.");
7063 }
7064 
7065 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7066     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7067   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7068          "Clauses associated with the teams directive expected to be emitted "
7069          "only for the host!");
7070   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7071   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7072          "Expected target-based executable directive.");
7073   CGBuilderTy &Bld = CGF.Builder;
7074   llvm::Value *ThreadLimitVal = nullptr;
7075   llvm::Value *NumThreadsVal = nullptr;
7076   switch (DirectiveKind) {
7077   case OMPD_target: {
7078     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7079     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7080       return NumThreads;
7081     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7082         CGF.getContext(), CS->getCapturedStmt());
7083     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7084       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7085         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7086         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7087         const auto *ThreadLimitClause =
7088             Dir->getSingleClause<OMPThreadLimitClause>();
7089         CodeGenFunction::LexicalScope Scope(
7090             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7091         if (const auto *PreInit =
7092                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7093           for (const auto *I : PreInit->decls()) {
7094             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7095               CGF.EmitVarDecl(cast<VarDecl>(*I));
7096             } else {
7097               CodeGenFunction::AutoVarEmission Emission =
7098                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7099               CGF.EmitAutoVarCleanups(Emission);
7100             }
7101           }
7102         }
7103         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7104             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7105         ThreadLimitVal =
7106             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7107       }
7108       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7109           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7110         CS = Dir->getInnermostCapturedStmt();
7111         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7112             CGF.getContext(), CS->getCapturedStmt());
7113         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7114       }
7115       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7116           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7117         CS = Dir->getInnermostCapturedStmt();
7118         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7119           return NumThreads;
7120       }
7121       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7122         return Bld.getInt32(1);
7123     }
7124     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7125   }
7126   case OMPD_target_teams: {
7127     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7128       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7129       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7130       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7131           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7132       ThreadLimitVal =
7133           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7134     }
7135     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7136     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7137       return NumThreads;
7138     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7139         CGF.getContext(), CS->getCapturedStmt());
7140     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7141       if (Dir->getDirectiveKind() == OMPD_distribute) {
7142         CS = Dir->getInnermostCapturedStmt();
7143         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7144           return NumThreads;
7145       }
7146     }
7147     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7148   }
7149   case OMPD_target_teams_distribute:
7150     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7151       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7152       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7153       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7154           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7155       ThreadLimitVal =
7156           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7157     }
7158     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7159   case OMPD_target_parallel:
7160   case OMPD_target_parallel_for:
7161   case OMPD_target_parallel_for_simd:
7162   case OMPD_target_teams_distribute_parallel_for:
7163   case OMPD_target_teams_distribute_parallel_for_simd: {
7164     llvm::Value *CondVal = nullptr;
7165     // Handle if clause. If if clause present, the number of threads is
7166     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7167     if (D.hasClausesOfKind<OMPIfClause>()) {
7168       const OMPIfClause *IfClause = nullptr;
7169       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7170         if (C->getNameModifier() == OMPD_unknown ||
7171             C->getNameModifier() == OMPD_parallel) {
7172           IfClause = C;
7173           break;
7174         }
7175       }
7176       if (IfClause) {
7177         const Expr *Cond = IfClause->getCondition();
7178         bool Result;
7179         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7180           if (!Result)
7181             return Bld.getInt32(1);
7182         } else {
7183           CodeGenFunction::RunCleanupsScope Scope(CGF);
7184           CondVal = CGF.EvaluateExprAsBool(Cond);
7185         }
7186       }
7187     }
7188     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7189       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7190       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7191       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7192           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7193       ThreadLimitVal =
7194           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7195     }
7196     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7197       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7198       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7199       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7200           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7201       NumThreadsVal =
7202           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7203       ThreadLimitVal = ThreadLimitVal
7204                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7205                                                                 ThreadLimitVal),
7206                                               NumThreadsVal, ThreadLimitVal)
7207                            : NumThreadsVal;
7208     }
7209     if (!ThreadLimitVal)
7210       ThreadLimitVal = Bld.getInt32(0);
7211     if (CondVal)
7212       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7213     return ThreadLimitVal;
7214   }
7215   case OMPD_target_teams_distribute_simd:
7216   case OMPD_target_simd:
7217     return Bld.getInt32(1);
7218   case OMPD_parallel:
7219   case OMPD_for:
7220   case OMPD_parallel_for:
7221   case OMPD_parallel_master:
7222   case OMPD_parallel_sections:
7223   case OMPD_for_simd:
7224   case OMPD_parallel_for_simd:
7225   case OMPD_cancel:
7226   case OMPD_cancellation_point:
7227   case OMPD_ordered:
7228   case OMPD_threadprivate:
7229   case OMPD_allocate:
7230   case OMPD_task:
7231   case OMPD_simd:
7232   case OMPD_tile:
7233   case OMPD_unroll:
7234   case OMPD_sections:
7235   case OMPD_section:
7236   case OMPD_single:
7237   case OMPD_master:
7238   case OMPD_critical:
7239   case OMPD_taskyield:
7240   case OMPD_barrier:
7241   case OMPD_taskwait:
7242   case OMPD_taskgroup:
7243   case OMPD_atomic:
7244   case OMPD_flush:
7245   case OMPD_depobj:
7246   case OMPD_scan:
7247   case OMPD_teams:
7248   case OMPD_target_data:
7249   case OMPD_target_exit_data:
7250   case OMPD_target_enter_data:
7251   case OMPD_distribute:
7252   case OMPD_distribute_simd:
7253   case OMPD_distribute_parallel_for:
7254   case OMPD_distribute_parallel_for_simd:
7255   case OMPD_teams_distribute:
7256   case OMPD_teams_distribute_simd:
7257   case OMPD_teams_distribute_parallel_for:
7258   case OMPD_teams_distribute_parallel_for_simd:
7259   case OMPD_target_update:
7260   case OMPD_declare_simd:
7261   case OMPD_declare_variant:
7262   case OMPD_begin_declare_variant:
7263   case OMPD_end_declare_variant:
7264   case OMPD_declare_target:
7265   case OMPD_end_declare_target:
7266   case OMPD_declare_reduction:
7267   case OMPD_declare_mapper:
7268   case OMPD_taskloop:
7269   case OMPD_taskloop_simd:
7270   case OMPD_master_taskloop:
7271   case OMPD_master_taskloop_simd:
7272   case OMPD_parallel_master_taskloop:
7273   case OMPD_parallel_master_taskloop_simd:
7274   case OMPD_requires:
7275   case OMPD_metadirective:
7276   case OMPD_unknown:
7277     break;
7278   default:
7279     break;
7280   }
7281   llvm_unreachable("Unsupported directive kind.");
7282 }
7283 
7284 namespace {
7285 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7286 
7287 // Utility to handle information from clauses associated with a given
7288 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7289 // It provides a convenient interface to obtain the information and generate
7290 // code for that information.
7291 class MappableExprsHandler {
7292 public:
7293   /// Values for bit flags used to specify the mapping type for
7294   /// offloading.
7295   enum OpenMPOffloadMappingFlags : uint64_t {
7296     /// No flags
7297     OMP_MAP_NONE = 0x0,
7298     /// Allocate memory on the device and move data from host to device.
7299     OMP_MAP_TO = 0x01,
7300     /// Allocate memory on the device and move data from device to host.
7301     OMP_MAP_FROM = 0x02,
7302     /// Always perform the requested mapping action on the element, even
7303     /// if it was already mapped before.
7304     OMP_MAP_ALWAYS = 0x04,
7305     /// Delete the element from the device environment, ignoring the
7306     /// current reference count associated with the element.
7307     OMP_MAP_DELETE = 0x08,
7308     /// The element being mapped is a pointer-pointee pair; both the
7309     /// pointer and the pointee should be mapped.
7310     OMP_MAP_PTR_AND_OBJ = 0x10,
7311     /// This flags signals that the base address of an entry should be
7312     /// passed to the target kernel as an argument.
7313     OMP_MAP_TARGET_PARAM = 0x20,
7314     /// Signal that the runtime library has to return the device pointer
7315     /// in the current position for the data being mapped. Used when we have the
7316     /// use_device_ptr or use_device_addr clause.
7317     OMP_MAP_RETURN_PARAM = 0x40,
7318     /// This flag signals that the reference being passed is a pointer to
7319     /// private data.
7320     OMP_MAP_PRIVATE = 0x80,
7321     /// Pass the element to the device by value.
7322     OMP_MAP_LITERAL = 0x100,
7323     /// Implicit map
7324     OMP_MAP_IMPLICIT = 0x200,
7325     /// Close is a hint to the runtime to allocate memory close to
7326     /// the target device.
7327     OMP_MAP_CLOSE = 0x400,
7328     /// 0x800 is reserved for compatibility with XLC.
7329     /// Produce a runtime error if the data is not already allocated.
7330     OMP_MAP_PRESENT = 0x1000,
7331     // Increment and decrement a separate reference counter so that the data
7332     // cannot be unmapped within the associated region.  Thus, this flag is
7333     // intended to be used on 'target' and 'target data' directives because they
7334     // are inherently structured.  It is not intended to be used on 'target
7335     // enter data' and 'target exit data' directives because they are inherently
7336     // dynamic.
7337     // This is an OpenMP extension for the sake of OpenACC support.
7338     OMP_MAP_OMPX_HOLD = 0x2000,
7339     /// Signal that the runtime library should use args as an array of
7340     /// descriptor_dim pointers and use args_size as dims. Used when we have
7341     /// non-contiguous list items in target update directive
7342     OMP_MAP_NON_CONTIG = 0x100000000000,
7343     /// The 16 MSBs of the flags indicate whether the entry is member of some
7344     /// struct/class.
7345     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7346     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7347   };
7348 
7349   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7350   static unsigned getFlagMemberOffset() {
7351     unsigned Offset = 0;
7352     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7353          Remain = Remain >> 1)
7354       Offset++;
7355     return Offset;
7356   }
7357 
7358   /// Class that holds debugging information for a data mapping to be passed to
7359   /// the runtime library.
7360   class MappingExprInfo {
7361     /// The variable declaration used for the data mapping.
7362     const ValueDecl *MapDecl = nullptr;
7363     /// The original expression used in the map clause, or null if there is
7364     /// none.
7365     const Expr *MapExpr = nullptr;
7366 
7367   public:
7368     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7369         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7370 
7371     const ValueDecl *getMapDecl() const { return MapDecl; }
7372     const Expr *getMapExpr() const { return MapExpr; }
7373   };
7374 
7375   /// Class that associates information with a base pointer to be passed to the
7376   /// runtime library.
7377   class BasePointerInfo {
7378     /// The base pointer.
7379     llvm::Value *Ptr = nullptr;
7380     /// The base declaration that refers to this device pointer, or null if
7381     /// there is none.
7382     const ValueDecl *DevPtrDecl = nullptr;
7383 
7384   public:
7385     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7386         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7387     llvm::Value *operator*() const { return Ptr; }
7388     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7389     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7390   };
7391 
7392   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7393   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7394   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7395   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7396   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7397   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7398   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7399 
7400   /// This structure contains combined information generated for mappable
7401   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7402   /// mappers, and non-contiguous information.
7403   struct MapCombinedInfoTy {
7404     struct StructNonContiguousInfo {
7405       bool IsNonContiguous = false;
7406       MapDimArrayTy Dims;
7407       MapNonContiguousArrayTy Offsets;
7408       MapNonContiguousArrayTy Counts;
7409       MapNonContiguousArrayTy Strides;
7410     };
7411     MapExprsArrayTy Exprs;
7412     MapBaseValuesArrayTy BasePointers;
7413     MapValuesArrayTy Pointers;
7414     MapValuesArrayTy Sizes;
7415     MapFlagsArrayTy Types;
7416     MapMappersArrayTy Mappers;
7417     StructNonContiguousInfo NonContigInfo;
7418 
7419     /// Append arrays in \a CurInfo.
7420     void append(MapCombinedInfoTy &CurInfo) {
7421       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7422       BasePointers.append(CurInfo.BasePointers.begin(),
7423                           CurInfo.BasePointers.end());
7424       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7425       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7426       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7427       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7428       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7429                                  CurInfo.NonContigInfo.Dims.end());
7430       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7431                                     CurInfo.NonContigInfo.Offsets.end());
7432       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7433                                    CurInfo.NonContigInfo.Counts.end());
7434       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7435                                     CurInfo.NonContigInfo.Strides.end());
7436     }
7437   };
7438 
7439   /// Map between a struct and the its lowest & highest elements which have been
7440   /// mapped.
7441   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7442   ///                    HE(FieldIndex, Pointer)}
7443   struct StructRangeInfoTy {
7444     MapCombinedInfoTy PreliminaryMapData;
7445     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7446         0, Address::invalid()};
7447     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7448         0, Address::invalid()};
7449     Address Base = Address::invalid();
7450     Address LB = Address::invalid();
7451     bool IsArraySection = false;
7452     bool HasCompleteRecord = false;
7453   };
7454 
7455 private:
7456   /// Kind that defines how a device pointer has to be returned.
7457   struct MapInfo {
7458     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7459     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7460     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7461     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7462     bool ReturnDevicePointer = false;
7463     bool IsImplicit = false;
7464     const ValueDecl *Mapper = nullptr;
7465     const Expr *VarRef = nullptr;
7466     bool ForDeviceAddr = false;
7467 
7468     MapInfo() = default;
7469     MapInfo(
7470         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7471         OpenMPMapClauseKind MapType,
7472         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7473         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7474         bool ReturnDevicePointer, bool IsImplicit,
7475         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7476         bool ForDeviceAddr = false)
7477         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7478           MotionModifiers(MotionModifiers),
7479           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7480           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7481   };
7482 
7483   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7484   /// member and there is no map information about it, then emission of that
7485   /// entry is deferred until the whole struct has been processed.
7486   struct DeferredDevicePtrEntryTy {
7487     const Expr *IE = nullptr;
7488     const ValueDecl *VD = nullptr;
7489     bool ForDeviceAddr = false;
7490 
7491     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7492                              bool ForDeviceAddr)
7493         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7494   };
7495 
7496   /// The target directive from where the mappable clauses were extracted. It
7497   /// is either a executable directive or a user-defined mapper directive.
7498   llvm::PointerUnion<const OMPExecutableDirective *,
7499                      const OMPDeclareMapperDecl *>
7500       CurDir;
7501 
7502   /// Function the directive is being generated for.
7503   CodeGenFunction &CGF;
7504 
7505   /// Set of all first private variables in the current directive.
7506   /// bool data is set to true if the variable is implicitly marked as
7507   /// firstprivate, false otherwise.
7508   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7509 
7510   /// Map between device pointer declarations and their expression components.
7511   /// The key value for declarations in 'this' is null.
7512   llvm::DenseMap<
7513       const ValueDecl *,
7514       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7515       DevPointersMap;
7516 
7517   /// Map between lambda declarations and their map type.
7518   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7519 
7520   llvm::Value *getExprTypeSize(const Expr *E) const {
7521     QualType ExprTy = E->getType().getCanonicalType();
7522 
7523     // Calculate the size for array shaping expression.
7524     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7525       llvm::Value *Size =
7526           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7527       for (const Expr *SE : OAE->getDimensions()) {
7528         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7529         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7530                                       CGF.getContext().getSizeType(),
7531                                       SE->getExprLoc());
7532         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7533       }
7534       return Size;
7535     }
7536 
7537     // Reference types are ignored for mapping purposes.
7538     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7539       ExprTy = RefTy->getPointeeType().getCanonicalType();
7540 
7541     // Given that an array section is considered a built-in type, we need to
7542     // do the calculation based on the length of the section instead of relying
7543     // on CGF.getTypeSize(E->getType()).
7544     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7545       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7546                             OAE->getBase()->IgnoreParenImpCasts())
7547                             .getCanonicalType();
7548 
7549       // If there is no length associated with the expression and lower bound is
7550       // not specified too, that means we are using the whole length of the
7551       // base.
7552       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7553           !OAE->getLowerBound())
7554         return CGF.getTypeSize(BaseTy);
7555 
7556       llvm::Value *ElemSize;
7557       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7558         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7559       } else {
7560         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7561         assert(ATy && "Expecting array type if not a pointer type.");
7562         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7563       }
7564 
7565       // If we don't have a length at this point, that is because we have an
7566       // array section with a single element.
7567       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7568         return ElemSize;
7569 
7570       if (const Expr *LenExpr = OAE->getLength()) {
7571         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7572         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7573                                              CGF.getContext().getSizeType(),
7574                                              LenExpr->getExprLoc());
7575         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7576       }
7577       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7578              OAE->getLowerBound() && "expected array_section[lb:].");
7579       // Size = sizetype - lb * elemtype;
7580       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7581       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7582       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7583                                        CGF.getContext().getSizeType(),
7584                                        OAE->getLowerBound()->getExprLoc());
7585       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7586       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7587       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7588       LengthVal = CGF.Builder.CreateSelect(
7589           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7590       return LengthVal;
7591     }
7592     return CGF.getTypeSize(ExprTy);
7593   }
7594 
7595   /// Return the corresponding bits for a given map clause modifier. Add
7596   /// a flag marking the map as a pointer if requested. Add a flag marking the
7597   /// map as the first one of a series of maps that relate to the same map
7598   /// expression.
7599   OpenMPOffloadMappingFlags getMapTypeBits(
7600       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7601       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7602       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7603     OpenMPOffloadMappingFlags Bits =
7604         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7605     switch (MapType) {
7606     case OMPC_MAP_alloc:
7607     case OMPC_MAP_release:
7608       // alloc and release is the default behavior in the runtime library,  i.e.
7609       // if we don't pass any bits alloc/release that is what the runtime is
7610       // going to do. Therefore, we don't need to signal anything for these two
7611       // type modifiers.
7612       break;
7613     case OMPC_MAP_to:
7614       Bits |= OMP_MAP_TO;
7615       break;
7616     case OMPC_MAP_from:
7617       Bits |= OMP_MAP_FROM;
7618       break;
7619     case OMPC_MAP_tofrom:
7620       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7621       break;
7622     case OMPC_MAP_delete:
7623       Bits |= OMP_MAP_DELETE;
7624       break;
7625     case OMPC_MAP_unknown:
7626       llvm_unreachable("Unexpected map type!");
7627     }
7628     if (AddPtrFlag)
7629       Bits |= OMP_MAP_PTR_AND_OBJ;
7630     if (AddIsTargetParamFlag)
7631       Bits |= OMP_MAP_TARGET_PARAM;
7632     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7633       Bits |= OMP_MAP_ALWAYS;
7634     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7635       Bits |= OMP_MAP_CLOSE;
7636     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7637         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7638       Bits |= OMP_MAP_PRESENT;
7639     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7640       Bits |= OMP_MAP_OMPX_HOLD;
7641     if (IsNonContiguous)
7642       Bits |= OMP_MAP_NON_CONTIG;
7643     return Bits;
7644   }
7645 
7646   /// Return true if the provided expression is a final array section. A
7647   /// final array section, is one whose length can't be proved to be one.
7648   bool isFinalArraySectionExpression(const Expr *E) const {
7649     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7650 
7651     // It is not an array section and therefore not a unity-size one.
7652     if (!OASE)
7653       return false;
7654 
7655     // An array section with no colon always refer to a single element.
7656     if (OASE->getColonLocFirst().isInvalid())
7657       return false;
7658 
7659     const Expr *Length = OASE->getLength();
7660 
7661     // If we don't have a length we have to check if the array has size 1
7662     // for this dimension. Also, we should always expect a length if the
7663     // base type is pointer.
7664     if (!Length) {
7665       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7666                              OASE->getBase()->IgnoreParenImpCasts())
7667                              .getCanonicalType();
7668       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7669         return ATy->getSize().getSExtValue() != 1;
7670       // If we don't have a constant dimension length, we have to consider
7671       // the current section as having any size, so it is not necessarily
7672       // unitary. If it happen to be unity size, that's user fault.
7673       return true;
7674     }
7675 
7676     // Check if the length evaluates to 1.
7677     Expr::EvalResult Result;
7678     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7679       return true; // Can have more that size 1.
7680 
7681     llvm::APSInt ConstLength = Result.Val.getInt();
7682     return ConstLength.getSExtValue() != 1;
7683   }
7684 
7685   /// Generate the base pointers, section pointers, sizes, map type bits, and
7686   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7687   /// map type, map or motion modifiers, and expression components.
7688   /// \a IsFirstComponent should be set to true if the provided set of
7689   /// components is the first associated with a capture.
7690   void generateInfoForComponentList(
7691       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7692       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7693       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7694       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7695       bool IsFirstComponentList, bool IsImplicit,
7696       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7697       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7698       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7699           OverlappedElements = llvm::None) const {
7700     // The following summarizes what has to be generated for each map and the
7701     // types below. The generated information is expressed in this order:
7702     // base pointer, section pointer, size, flags
7703     // (to add to the ones that come from the map type and modifier).
7704     //
7705     // double d;
7706     // int i[100];
7707     // float *p;
7708     //
7709     // struct S1 {
7710     //   int i;
7711     //   float f[50];
7712     // }
7713     // struct S2 {
7714     //   int i;
7715     //   float f[50];
7716     //   S1 s;
7717     //   double *p;
7718     //   struct S2 *ps;
7719     //   int &ref;
7720     // }
7721     // S2 s;
7722     // S2 *ps;
7723     //
7724     // map(d)
7725     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7726     //
7727     // map(i)
7728     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7729     //
7730     // map(i[1:23])
7731     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7732     //
7733     // map(p)
7734     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7735     //
7736     // map(p[1:24])
7737     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7738     // in unified shared memory mode or for local pointers
7739     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7740     //
7741     // map(s)
7742     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7743     //
7744     // map(s.i)
7745     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7746     //
7747     // map(s.s.f)
7748     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7749     //
7750     // map(s.p)
7751     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7752     //
7753     // map(to: s.p[:22])
7754     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7755     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7756     // &(s.p), &(s.p[0]), 22*sizeof(double),
7757     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7758     // (*) alloc space for struct members, only this is a target parameter
7759     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7760     //      optimizes this entry out, same in the examples below)
7761     // (***) map the pointee (map: to)
7762     //
7763     // map(to: s.ref)
7764     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7765     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7766     // (*) alloc space for struct members, only this is a target parameter
7767     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7768     //      optimizes this entry out, same in the examples below)
7769     // (***) map the pointee (map: to)
7770     //
7771     // map(s.ps)
7772     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7773     //
7774     // map(from: s.ps->s.i)
7775     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7776     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7777     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7778     //
7779     // map(to: s.ps->ps)
7780     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7781     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7782     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7783     //
7784     // map(s.ps->ps->ps)
7785     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7786     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7787     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7788     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7789     //
7790     // map(to: s.ps->ps->s.f[:22])
7791     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7792     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7793     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7794     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7795     //
7796     // map(ps)
7797     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7798     //
7799     // map(ps->i)
7800     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7801     //
7802     // map(ps->s.f)
7803     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7804     //
7805     // map(from: ps->p)
7806     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7807     //
7808     // map(to: ps->p[:22])
7809     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7810     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7811     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7812     //
7813     // map(ps->ps)
7814     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7815     //
7816     // map(from: ps->ps->s.i)
7817     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7818     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7819     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7820     //
7821     // map(from: ps->ps->ps)
7822     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7823     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7824     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7825     //
7826     // map(ps->ps->ps->ps)
7827     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7828     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7829     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7830     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7831     //
7832     // map(to: ps->ps->ps->s.f[:22])
7833     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7834     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7835     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7836     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7837     //
7838     // map(to: s.f[:22]) map(from: s.p[:33])
7839     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7840     //     sizeof(double*) (**), TARGET_PARAM
7841     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7842     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7843     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7844     // (*) allocate contiguous space needed to fit all mapped members even if
7845     //     we allocate space for members not mapped (in this example,
7846     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7847     //     them as well because they fall between &s.f[0] and &s.p)
7848     //
7849     // map(from: s.f[:22]) map(to: ps->p[:33])
7850     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7851     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7852     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7853     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7854     // (*) the struct this entry pertains to is the 2nd element in the list of
7855     //     arguments, hence MEMBER_OF(2)
7856     //
7857     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7858     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7859     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7860     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7861     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7862     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7863     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7864     // (*) the struct this entry pertains to is the 4th element in the list
7865     //     of arguments, hence MEMBER_OF(4)
7866 
7867     // Track if the map information being generated is the first for a capture.
7868     bool IsCaptureFirstInfo = IsFirstComponentList;
7869     // When the variable is on a declare target link or in a to clause with
7870     // unified memory, a reference is needed to hold the host/device address
7871     // of the variable.
7872     bool RequiresReference = false;
7873 
7874     // Scan the components from the base to the complete expression.
7875     auto CI = Components.rbegin();
7876     auto CE = Components.rend();
7877     auto I = CI;
7878 
7879     // Track if the map information being generated is the first for a list of
7880     // components.
7881     bool IsExpressionFirstInfo = true;
7882     bool FirstPointerInComplexData = false;
7883     Address BP = Address::invalid();
7884     const Expr *AssocExpr = I->getAssociatedExpression();
7885     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7886     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7887     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7888 
7889     if (isa<MemberExpr>(AssocExpr)) {
7890       // The base is the 'this' pointer. The content of the pointer is going
7891       // to be the base of the field being mapped.
7892       BP = CGF.LoadCXXThisAddress();
7893     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7894                (OASE &&
7895                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7896       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7897     } else if (OAShE &&
7898                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7899       BP = Address::deprecated(
7900           CGF.EmitScalarExpr(OAShE->getBase()),
7901           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7902     } else {
7903       // The base is the reference to the variable.
7904       // BP = &Var.
7905       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7906       if (const auto *VD =
7907               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7908         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7909                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7910           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7911               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7912                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7913             RequiresReference = true;
7914             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7915           }
7916         }
7917       }
7918 
7919       // If the variable is a pointer and is being dereferenced (i.e. is not
7920       // the last component), the base has to be the pointer itself, not its
7921       // reference. References are ignored for mapping purposes.
7922       QualType Ty =
7923           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7924       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7925         // No need to generate individual map information for the pointer, it
7926         // can be associated with the combined storage if shared memory mode is
7927         // active or the base declaration is not global variable.
7928         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7929         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7930             !VD || VD->hasLocalStorage())
7931           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7932         else
7933           FirstPointerInComplexData = true;
7934         ++I;
7935       }
7936     }
7937 
7938     // Track whether a component of the list should be marked as MEMBER_OF some
7939     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7940     // in a component list should be marked as MEMBER_OF, all subsequent entries
7941     // do not belong to the base struct. E.g.
7942     // struct S2 s;
7943     // s.ps->ps->ps->f[:]
7944     //   (1) (2) (3) (4)
7945     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7946     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7947     // is the pointee of ps(2) which is not member of struct s, so it should not
7948     // be marked as such (it is still PTR_AND_OBJ).
7949     // The variable is initialized to false so that PTR_AND_OBJ entries which
7950     // are not struct members are not considered (e.g. array of pointers to
7951     // data).
7952     bool ShouldBeMemberOf = false;
7953 
7954     // Variable keeping track of whether or not we have encountered a component
7955     // in the component list which is a member expression. Useful when we have a
7956     // pointer or a final array section, in which case it is the previous
7957     // component in the list which tells us whether we have a member expression.
7958     // E.g. X.f[:]
7959     // While processing the final array section "[:]" it is "f" which tells us
7960     // whether we are dealing with a member of a declared struct.
7961     const MemberExpr *EncounteredME = nullptr;
7962 
7963     // Track for the total number of dimension. Start from one for the dummy
7964     // dimension.
7965     uint64_t DimSize = 1;
7966 
7967     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7968     bool IsPrevMemberReference = false;
7969 
7970     for (; I != CE; ++I) {
7971       // If the current component is member of a struct (parent struct) mark it.
7972       if (!EncounteredME) {
7973         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7974         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7975         // as MEMBER_OF the parent struct.
7976         if (EncounteredME) {
7977           ShouldBeMemberOf = true;
7978           // Do not emit as complex pointer if this is actually not array-like
7979           // expression.
7980           if (FirstPointerInComplexData) {
7981             QualType Ty = std::prev(I)
7982                               ->getAssociatedDeclaration()
7983                               ->getType()
7984                               .getNonReferenceType();
7985             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7986             FirstPointerInComplexData = false;
7987           }
7988         }
7989       }
7990 
7991       auto Next = std::next(I);
7992 
7993       // We need to generate the addresses and sizes if this is the last
7994       // component, if the component is a pointer or if it is an array section
7995       // whose length can't be proved to be one. If this is a pointer, it
7996       // becomes the base address for the following components.
7997 
7998       // A final array section, is one whose length can't be proved to be one.
7999       // If the map item is non-contiguous then we don't treat any array section
8000       // as final array section.
8001       bool IsFinalArraySection =
8002           !IsNonContiguous &&
8003           isFinalArraySectionExpression(I->getAssociatedExpression());
8004 
8005       // If we have a declaration for the mapping use that, otherwise use
8006       // the base declaration of the map clause.
8007       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8008                                      ? I->getAssociatedDeclaration()
8009                                      : BaseDecl;
8010       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8011                                                : MapExpr;
8012 
8013       // Get information on whether the element is a pointer. Have to do a
8014       // special treatment for array sections given that they are built-in
8015       // types.
8016       const auto *OASE =
8017           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
8018       const auto *OAShE =
8019           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8020       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8021       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8022       bool IsPointer =
8023           OAShE ||
8024           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8025                        .getCanonicalType()
8026                        ->isAnyPointerType()) ||
8027           I->getAssociatedExpression()->getType()->isAnyPointerType();
8028       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8029                                MapDecl &&
8030                                MapDecl->getType()->isLValueReferenceType();
8031       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8032 
8033       if (OASE)
8034         ++DimSize;
8035 
8036       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8037           IsFinalArraySection) {
8038         // If this is not the last component, we expect the pointer to be
8039         // associated with an array expression or member expression.
8040         assert((Next == CE ||
8041                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8042                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8043                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8044                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8045                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8046                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8047                "Unexpected expression");
8048 
8049         Address LB = Address::invalid();
8050         Address LowestElem = Address::invalid();
8051         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8052                                        const MemberExpr *E) {
8053           const Expr *BaseExpr = E->getBase();
8054           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8055           // scalar.
8056           LValue BaseLV;
8057           if (E->isArrow()) {
8058             LValueBaseInfo BaseInfo;
8059             TBAAAccessInfo TBAAInfo;
8060             Address Addr =
8061                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8062             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8063             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8064           } else {
8065             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8066           }
8067           return BaseLV;
8068         };
8069         if (OAShE) {
8070           LowestElem = LB =
8071               Address::deprecated(CGF.EmitScalarExpr(OAShE->getBase()),
8072                                   CGF.getContext().getTypeAlignInChars(
8073                                       OAShE->getBase()->getType()));
8074         } else if (IsMemberReference) {
8075           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8076           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8077           LowestElem = CGF.EmitLValueForFieldInitialization(
8078                               BaseLVal, cast<FieldDecl>(MapDecl))
8079                            .getAddress(CGF);
8080           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8081                    .getAddress(CGF);
8082         } else {
8083           LowestElem = LB =
8084               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8085                   .getAddress(CGF);
8086         }
8087 
8088         // If this component is a pointer inside the base struct then we don't
8089         // need to create any entry for it - it will be combined with the object
8090         // it is pointing to into a single PTR_AND_OBJ entry.
8091         bool IsMemberPointerOrAddr =
8092             EncounteredME &&
8093             (((IsPointer || ForDeviceAddr) &&
8094               I->getAssociatedExpression() == EncounteredME) ||
8095              (IsPrevMemberReference && !IsPointer) ||
8096              (IsMemberReference && Next != CE &&
8097               !Next->getAssociatedExpression()->getType()->isPointerType()));
8098         if (!OverlappedElements.empty() && Next == CE) {
8099           // Handle base element with the info for overlapped elements.
8100           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8101           assert(!IsPointer &&
8102                  "Unexpected base element with the pointer type.");
8103           // Mark the whole struct as the struct that requires allocation on the
8104           // device.
8105           PartialStruct.LowestElem = {0, LowestElem};
8106           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8107               I->getAssociatedExpression()->getType());
8108           Address HB = CGF.Builder.CreateConstGEP(
8109               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8110                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8111               TypeSize.getQuantity() - 1);
8112           PartialStruct.HighestElem = {
8113               std::numeric_limits<decltype(
8114                   PartialStruct.HighestElem.first)>::max(),
8115               HB};
8116           PartialStruct.Base = BP;
8117           PartialStruct.LB = LB;
8118           assert(
8119               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8120               "Overlapped elements must be used only once for the variable.");
8121           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8122           // Emit data for non-overlapped data.
8123           OpenMPOffloadMappingFlags Flags =
8124               OMP_MAP_MEMBER_OF |
8125               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8126                              /*AddPtrFlag=*/false,
8127                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8128           llvm::Value *Size = nullptr;
8129           // Do bitcopy of all non-overlapped structure elements.
8130           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8131                    Component : OverlappedElements) {
8132             Address ComponentLB = Address::invalid();
8133             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8134                  Component) {
8135               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8136                 const auto *FD = dyn_cast<FieldDecl>(VD);
8137                 if (FD && FD->getType()->isLValueReferenceType()) {
8138                   const auto *ME =
8139                       cast<MemberExpr>(MC.getAssociatedExpression());
8140                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8141                   ComponentLB =
8142                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8143                           .getAddress(CGF);
8144                 } else {
8145                   ComponentLB =
8146                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8147                           .getAddress(CGF);
8148                 }
8149                 Size = CGF.Builder.CreatePtrDiff(
8150                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8151                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8152                 break;
8153               }
8154             }
8155             assert(Size && "Failed to determine structure size");
8156             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8157             CombinedInfo.BasePointers.push_back(BP.getPointer());
8158             CombinedInfo.Pointers.push_back(LB.getPointer());
8159             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8160                 Size, CGF.Int64Ty, /*isSigned=*/true));
8161             CombinedInfo.Types.push_back(Flags);
8162             CombinedInfo.Mappers.push_back(nullptr);
8163             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8164                                                                       : 1);
8165             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8166           }
8167           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8168           CombinedInfo.BasePointers.push_back(BP.getPointer());
8169           CombinedInfo.Pointers.push_back(LB.getPointer());
8170           Size = CGF.Builder.CreatePtrDiff(
8171               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8172               CGF.EmitCastToVoidPtr(LB.getPointer()));
8173           CombinedInfo.Sizes.push_back(
8174               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8175           CombinedInfo.Types.push_back(Flags);
8176           CombinedInfo.Mappers.push_back(nullptr);
8177           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8178                                                                     : 1);
8179           break;
8180         }
8181         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8182         if (!IsMemberPointerOrAddr ||
8183             (Next == CE && MapType != OMPC_MAP_unknown)) {
8184           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8185           CombinedInfo.BasePointers.push_back(BP.getPointer());
8186           CombinedInfo.Pointers.push_back(LB.getPointer());
8187           CombinedInfo.Sizes.push_back(
8188               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8189           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8190                                                                     : 1);
8191 
8192           // If Mapper is valid, the last component inherits the mapper.
8193           bool HasMapper = Mapper && Next == CE;
8194           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8195 
8196           // We need to add a pointer flag for each map that comes from the
8197           // same expression except for the first one. We also need to signal
8198           // this map is the first one that relates with the current capture
8199           // (there is a set of entries for each capture).
8200           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8201               MapType, MapModifiers, MotionModifiers, IsImplicit,
8202               !IsExpressionFirstInfo || RequiresReference ||
8203                   FirstPointerInComplexData || IsMemberReference,
8204               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8205 
8206           if (!IsExpressionFirstInfo || IsMemberReference) {
8207             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8208             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8209             if (IsPointer || (IsMemberReference && Next != CE))
8210               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8211                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8212 
8213             if (ShouldBeMemberOf) {
8214               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8215               // should be later updated with the correct value of MEMBER_OF.
8216               Flags |= OMP_MAP_MEMBER_OF;
8217               // From now on, all subsequent PTR_AND_OBJ entries should not be
8218               // marked as MEMBER_OF.
8219               ShouldBeMemberOf = false;
8220             }
8221           }
8222 
8223           CombinedInfo.Types.push_back(Flags);
8224         }
8225 
8226         // If we have encountered a member expression so far, keep track of the
8227         // mapped member. If the parent is "*this", then the value declaration
8228         // is nullptr.
8229         if (EncounteredME) {
8230           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8231           unsigned FieldIndex = FD->getFieldIndex();
8232 
8233           // Update info about the lowest and highest elements for this struct
8234           if (!PartialStruct.Base.isValid()) {
8235             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8236             if (IsFinalArraySection) {
8237               Address HB =
8238                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8239                       .getAddress(CGF);
8240               PartialStruct.HighestElem = {FieldIndex, HB};
8241             } else {
8242               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8243             }
8244             PartialStruct.Base = BP;
8245             PartialStruct.LB = BP;
8246           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8247             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8248           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8249             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8250           }
8251         }
8252 
8253         // Need to emit combined struct for array sections.
8254         if (IsFinalArraySection || IsNonContiguous)
8255           PartialStruct.IsArraySection = true;
8256 
8257         // If we have a final array section, we are done with this expression.
8258         if (IsFinalArraySection)
8259           break;
8260 
8261         // The pointer becomes the base for the next element.
8262         if (Next != CE)
8263           BP = IsMemberReference ? LowestElem : LB;
8264 
8265         IsExpressionFirstInfo = false;
8266         IsCaptureFirstInfo = false;
8267         FirstPointerInComplexData = false;
8268         IsPrevMemberReference = IsMemberReference;
8269       } else if (FirstPointerInComplexData) {
8270         QualType Ty = Components.rbegin()
8271                           ->getAssociatedDeclaration()
8272                           ->getType()
8273                           .getNonReferenceType();
8274         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8275         FirstPointerInComplexData = false;
8276       }
8277     }
8278     // If ran into the whole component - allocate the space for the whole
8279     // record.
8280     if (!EncounteredME)
8281       PartialStruct.HasCompleteRecord = true;
8282 
8283     if (!IsNonContiguous)
8284       return;
8285 
8286     const ASTContext &Context = CGF.getContext();
8287 
8288     // For supporting stride in array section, we need to initialize the first
8289     // dimension size as 1, first offset as 0, and first count as 1
8290     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8291     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8292     MapValuesArrayTy CurStrides;
8293     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8294     uint64_t ElementTypeSize;
8295 
8296     // Collect Size information for each dimension and get the element size as
8297     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8298     // should be [10, 10] and the first stride is 4 btyes.
8299     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8300          Components) {
8301       const Expr *AssocExpr = Component.getAssociatedExpression();
8302       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8303 
8304       if (!OASE)
8305         continue;
8306 
8307       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8308       auto *CAT = Context.getAsConstantArrayType(Ty);
8309       auto *VAT = Context.getAsVariableArrayType(Ty);
8310 
8311       // We need all the dimension size except for the last dimension.
8312       assert((VAT || CAT || &Component == &*Components.begin()) &&
8313              "Should be either ConstantArray or VariableArray if not the "
8314              "first Component");
8315 
8316       // Get element size if CurStrides is empty.
8317       if (CurStrides.empty()) {
8318         const Type *ElementType = nullptr;
8319         if (CAT)
8320           ElementType = CAT->getElementType().getTypePtr();
8321         else if (VAT)
8322           ElementType = VAT->getElementType().getTypePtr();
8323         else
8324           assert(&Component == &*Components.begin() &&
8325                  "Only expect pointer (non CAT or VAT) when this is the "
8326                  "first Component");
8327         // If ElementType is null, then it means the base is a pointer
8328         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8329         // for next iteration.
8330         if (ElementType) {
8331           // For the case that having pointer as base, we need to remove one
8332           // level of indirection.
8333           if (&Component != &*Components.begin())
8334             ElementType = ElementType->getPointeeOrArrayElementType();
8335           ElementTypeSize =
8336               Context.getTypeSizeInChars(ElementType).getQuantity();
8337           CurStrides.push_back(
8338               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8339         }
8340       }
8341       // Get dimension value except for the last dimension since we don't need
8342       // it.
8343       if (DimSizes.size() < Components.size() - 1) {
8344         if (CAT)
8345           DimSizes.push_back(llvm::ConstantInt::get(
8346               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8347         else if (VAT)
8348           DimSizes.push_back(CGF.Builder.CreateIntCast(
8349               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8350               /*IsSigned=*/false));
8351       }
8352     }
8353 
8354     // Skip the dummy dimension since we have already have its information.
8355     auto *DI = DimSizes.begin() + 1;
8356     // Product of dimension.
8357     llvm::Value *DimProd =
8358         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8359 
8360     // Collect info for non-contiguous. Notice that offset, count, and stride
8361     // are only meaningful for array-section, so we insert a null for anything
8362     // other than array-section.
8363     // Also, the size of offset, count, and stride are not the same as
8364     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8365     // count, and stride are the same as the number of non-contiguous
8366     // declaration in target update to/from clause.
8367     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8368          Components) {
8369       const Expr *AssocExpr = Component.getAssociatedExpression();
8370 
8371       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8372         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8373             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8374             /*isSigned=*/false);
8375         CurOffsets.push_back(Offset);
8376         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8377         CurStrides.push_back(CurStrides.back());
8378         continue;
8379       }
8380 
8381       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8382 
8383       if (!OASE)
8384         continue;
8385 
8386       // Offset
8387       const Expr *OffsetExpr = OASE->getLowerBound();
8388       llvm::Value *Offset = nullptr;
8389       if (!OffsetExpr) {
8390         // If offset is absent, then we just set it to zero.
8391         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8392       } else {
8393         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8394                                            CGF.Int64Ty,
8395                                            /*isSigned=*/false);
8396       }
8397       CurOffsets.push_back(Offset);
8398 
8399       // Count
8400       const Expr *CountExpr = OASE->getLength();
8401       llvm::Value *Count = nullptr;
8402       if (!CountExpr) {
8403         // In Clang, once a high dimension is an array section, we construct all
8404         // the lower dimension as array section, however, for case like
8405         // arr[0:2][2], Clang construct the inner dimension as an array section
8406         // but it actually is not in an array section form according to spec.
8407         if (!OASE->getColonLocFirst().isValid() &&
8408             !OASE->getColonLocSecond().isValid()) {
8409           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8410         } else {
8411           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8412           // When the length is absent it defaults to ⌈(size −
8413           // lower-bound)/stride⌉, where size is the size of the array
8414           // dimension.
8415           const Expr *StrideExpr = OASE->getStride();
8416           llvm::Value *Stride =
8417               StrideExpr
8418                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8419                                               CGF.Int64Ty, /*isSigned=*/false)
8420                   : nullptr;
8421           if (Stride)
8422             Count = CGF.Builder.CreateUDiv(
8423                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8424           else
8425             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8426         }
8427       } else {
8428         Count = CGF.EmitScalarExpr(CountExpr);
8429       }
8430       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8431       CurCounts.push_back(Count);
8432 
8433       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8434       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8435       //              Offset      Count     Stride
8436       //    D0          0           1         4    (int)    <- dummy dimension
8437       //    D1          0           2         8    (2 * (1) * 4)
8438       //    D2          1           2         20   (1 * (1 * 5) * 4)
8439       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8440       const Expr *StrideExpr = OASE->getStride();
8441       llvm::Value *Stride =
8442           StrideExpr
8443               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8444                                           CGF.Int64Ty, /*isSigned=*/false)
8445               : nullptr;
8446       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8447       if (Stride)
8448         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8449       else
8450         CurStrides.push_back(DimProd);
8451       if (DI != DimSizes.end())
8452         ++DI;
8453     }
8454 
8455     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8456     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8457     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8458   }
8459 
8460   /// Return the adjusted map modifiers if the declaration a capture refers to
8461   /// appears in a first-private clause. This is expected to be used only with
8462   /// directives that start with 'target'.
8463   MappableExprsHandler::OpenMPOffloadMappingFlags
8464   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8465     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8466 
8467     // A first private variable captured by reference will use only the
8468     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8469     // declaration is known as first-private in this handler.
8470     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8471       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8472         return MappableExprsHandler::OMP_MAP_TO |
8473                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8474       return MappableExprsHandler::OMP_MAP_PRIVATE |
8475              MappableExprsHandler::OMP_MAP_TO;
8476     }
8477     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8478     if (I != LambdasMap.end())
8479       // for map(to: lambda): using user specified map type.
8480       return getMapTypeBits(
8481           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8482           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8483           /*AddPtrFlag=*/false,
8484           /*AddIsTargetParamFlag=*/false,
8485           /*isNonContiguous=*/false);
8486     return MappableExprsHandler::OMP_MAP_TO |
8487            MappableExprsHandler::OMP_MAP_FROM;
8488   }
8489 
8490   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8491     // Rotate by getFlagMemberOffset() bits.
8492     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8493                                                   << getFlagMemberOffset());
8494   }
8495 
8496   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8497                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8498     // If the entry is PTR_AND_OBJ but has not been marked with the special
8499     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8500     // marked as MEMBER_OF.
8501     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8502         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8503       return;
8504 
8505     // Reset the placeholder value to prepare the flag for the assignment of the
8506     // proper MEMBER_OF value.
8507     Flags &= ~OMP_MAP_MEMBER_OF;
8508     Flags |= MemberOfFlag;
8509   }
8510 
8511   void getPlainLayout(const CXXRecordDecl *RD,
8512                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8513                       bool AsBase) const {
8514     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8515 
8516     llvm::StructType *St =
8517         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8518 
8519     unsigned NumElements = St->getNumElements();
8520     llvm::SmallVector<
8521         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8522         RecordLayout(NumElements);
8523 
8524     // Fill bases.
8525     for (const auto &I : RD->bases()) {
8526       if (I.isVirtual())
8527         continue;
8528       const auto *Base = I.getType()->getAsCXXRecordDecl();
8529       // Ignore empty bases.
8530       if (Base->isEmpty() || CGF.getContext()
8531                                  .getASTRecordLayout(Base)
8532                                  .getNonVirtualSize()
8533                                  .isZero())
8534         continue;
8535 
8536       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8537       RecordLayout[FieldIndex] = Base;
8538     }
8539     // Fill in virtual bases.
8540     for (const auto &I : RD->vbases()) {
8541       const auto *Base = I.getType()->getAsCXXRecordDecl();
8542       // Ignore empty bases.
8543       if (Base->isEmpty())
8544         continue;
8545       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8546       if (RecordLayout[FieldIndex])
8547         continue;
8548       RecordLayout[FieldIndex] = Base;
8549     }
8550     // Fill in all the fields.
8551     assert(!RD->isUnion() && "Unexpected union.");
8552     for (const auto *Field : RD->fields()) {
8553       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8554       // will fill in later.)
8555       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8556         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8557         RecordLayout[FieldIndex] = Field;
8558       }
8559     }
8560     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8561              &Data : RecordLayout) {
8562       if (Data.isNull())
8563         continue;
8564       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8565         getPlainLayout(Base, Layout, /*AsBase=*/true);
8566       else
8567         Layout.push_back(Data.get<const FieldDecl *>());
8568     }
8569   }
8570 
8571   /// Generate all the base pointers, section pointers, sizes, map types, and
8572   /// mappers for the extracted mappable expressions (all included in \a
8573   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8574   /// pair of the relevant declaration and index where it occurs is appended to
8575   /// the device pointers info array.
8576   void generateAllInfoForClauses(
8577       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8578       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8579           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8580     // We have to process the component lists that relate with the same
8581     // declaration in a single chunk so that we can generate the map flags
8582     // correctly. Therefore, we organize all lists in a map.
8583     enum MapKind { Present, Allocs, Other, Total };
8584     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8585                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8586         Info;
8587 
8588     // Helper function to fill the information map for the different supported
8589     // clauses.
8590     auto &&InfoGen =
8591         [&Info, &SkipVarSet](
8592             const ValueDecl *D, MapKind Kind,
8593             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8594             OpenMPMapClauseKind MapType,
8595             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8596             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8597             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8598             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8599           if (SkipVarSet.contains(D))
8600             return;
8601           auto It = Info.find(D);
8602           if (It == Info.end())
8603             It = Info
8604                      .insert(std::make_pair(
8605                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8606                      .first;
8607           It->second[Kind].emplace_back(
8608               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8609               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8610         };
8611 
8612     for (const auto *Cl : Clauses) {
8613       const auto *C = dyn_cast<OMPMapClause>(Cl);
8614       if (!C)
8615         continue;
8616       MapKind Kind = Other;
8617       if (llvm::is_contained(C->getMapTypeModifiers(),
8618                              OMPC_MAP_MODIFIER_present))
8619         Kind = Present;
8620       else if (C->getMapType() == OMPC_MAP_alloc)
8621         Kind = Allocs;
8622       const auto *EI = C->getVarRefs().begin();
8623       for (const auto L : C->component_lists()) {
8624         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8625         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8626                 C->getMapTypeModifiers(), llvm::None,
8627                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8628                 E);
8629         ++EI;
8630       }
8631     }
8632     for (const auto *Cl : Clauses) {
8633       const auto *C = dyn_cast<OMPToClause>(Cl);
8634       if (!C)
8635         continue;
8636       MapKind Kind = Other;
8637       if (llvm::is_contained(C->getMotionModifiers(),
8638                              OMPC_MOTION_MODIFIER_present))
8639         Kind = Present;
8640       const auto *EI = C->getVarRefs().begin();
8641       for (const auto L : C->component_lists()) {
8642         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8643                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8644                 C->isImplicit(), std::get<2>(L), *EI);
8645         ++EI;
8646       }
8647     }
8648     for (const auto *Cl : Clauses) {
8649       const auto *C = dyn_cast<OMPFromClause>(Cl);
8650       if (!C)
8651         continue;
8652       MapKind Kind = Other;
8653       if (llvm::is_contained(C->getMotionModifiers(),
8654                              OMPC_MOTION_MODIFIER_present))
8655         Kind = Present;
8656       const auto *EI = C->getVarRefs().begin();
8657       for (const auto L : C->component_lists()) {
8658         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8659                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8660                 C->isImplicit(), std::get<2>(L), *EI);
8661         ++EI;
8662       }
8663     }
8664 
8665     // Look at the use_device_ptr clause information and mark the existing map
8666     // entries as such. If there is no map information for an entry in the
8667     // use_device_ptr list, we create one with map type 'alloc' and zero size
8668     // section. It is the user fault if that was not mapped before. If there is
8669     // no map information and the pointer is a struct member, then we defer the
8670     // emission of that entry until the whole struct has been processed.
8671     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8672                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8673         DeferredInfo;
8674     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8675 
8676     for (const auto *Cl : Clauses) {
8677       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8678       if (!C)
8679         continue;
8680       for (const auto L : C->component_lists()) {
8681         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8682             std::get<1>(L);
8683         assert(!Components.empty() &&
8684                "Not expecting empty list of components!");
8685         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8686         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8687         const Expr *IE = Components.back().getAssociatedExpression();
8688         // If the first component is a member expression, we have to look into
8689         // 'this', which maps to null in the map of map information. Otherwise
8690         // look directly for the information.
8691         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8692 
8693         // We potentially have map information for this declaration already.
8694         // Look for the first set of components that refer to it.
8695         if (It != Info.end()) {
8696           bool Found = false;
8697           for (auto &Data : It->second) {
8698             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8699               return MI.Components.back().getAssociatedDeclaration() == VD;
8700             });
8701             // If we found a map entry, signal that the pointer has to be
8702             // returned and move on to the next declaration. Exclude cases where
8703             // the base pointer is mapped as array subscript, array section or
8704             // array shaping. The base address is passed as a pointer to base in
8705             // this case and cannot be used as a base for use_device_ptr list
8706             // item.
8707             if (CI != Data.end()) {
8708               auto PrevCI = std::next(CI->Components.rbegin());
8709               const auto *VarD = dyn_cast<VarDecl>(VD);
8710               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8711                   isa<MemberExpr>(IE) ||
8712                   !VD->getType().getNonReferenceType()->isPointerType() ||
8713                   PrevCI == CI->Components.rend() ||
8714                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8715                   VarD->hasLocalStorage()) {
8716                 CI->ReturnDevicePointer = true;
8717                 Found = true;
8718                 break;
8719               }
8720             }
8721           }
8722           if (Found)
8723             continue;
8724         }
8725 
8726         // We didn't find any match in our map information - generate a zero
8727         // size array section - if the pointer is a struct member we defer this
8728         // action until the whole struct has been processed.
8729         if (isa<MemberExpr>(IE)) {
8730           // Insert the pointer into Info to be processed by
8731           // generateInfoForComponentList. Because it is a member pointer
8732           // without a pointee, no entry will be generated for it, therefore
8733           // we need to generate one after the whole struct has been processed.
8734           // Nonetheless, generateInfoForComponentList must be called to take
8735           // the pointer into account for the calculation of the range of the
8736           // partial struct.
8737           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8738                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8739                   nullptr);
8740           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8741         } else {
8742           llvm::Value *Ptr =
8743               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8744           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8745           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8746           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8747           UseDevicePtrCombinedInfo.Sizes.push_back(
8748               llvm::Constant::getNullValue(CGF.Int64Ty));
8749           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8750           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8751         }
8752       }
8753     }
8754 
8755     // Look at the use_device_addr clause information and mark the existing map
8756     // entries as such. If there is no map information for an entry in the
8757     // use_device_addr list, we create one with map type 'alloc' and zero size
8758     // section. It is the user fault if that was not mapped before. If there is
8759     // no map information and the pointer is a struct member, then we defer the
8760     // emission of that entry until the whole struct has been processed.
8761     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8762     for (const auto *Cl : Clauses) {
8763       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8764       if (!C)
8765         continue;
8766       for (const auto L : C->component_lists()) {
8767         assert(!std::get<1>(L).empty() &&
8768                "Not expecting empty list of components!");
8769         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8770         if (!Processed.insert(VD).second)
8771           continue;
8772         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8773         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8774         // If the first component is a member expression, we have to look into
8775         // 'this', which maps to null in the map of map information. Otherwise
8776         // look directly for the information.
8777         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8778 
8779         // We potentially have map information for this declaration already.
8780         // Look for the first set of components that refer to it.
8781         if (It != Info.end()) {
8782           bool Found = false;
8783           for (auto &Data : It->second) {
8784             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8785               return MI.Components.back().getAssociatedDeclaration() == VD;
8786             });
8787             // If we found a map entry, signal that the pointer has to be
8788             // returned and move on to the next declaration.
8789             if (CI != Data.end()) {
8790               CI->ReturnDevicePointer = true;
8791               Found = true;
8792               break;
8793             }
8794           }
8795           if (Found)
8796             continue;
8797         }
8798 
8799         // We didn't find any match in our map information - generate a zero
8800         // size array section - if the pointer is a struct member we defer this
8801         // action until the whole struct has been processed.
8802         if (isa<MemberExpr>(IE)) {
8803           // Insert the pointer into Info to be processed by
8804           // generateInfoForComponentList. Because it is a member pointer
8805           // without a pointee, no entry will be generated for it, therefore
8806           // we need to generate one after the whole struct has been processed.
8807           // Nonetheless, generateInfoForComponentList must be called to take
8808           // the pointer into account for the calculation of the range of the
8809           // partial struct.
8810           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8811                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8812                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8813           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8814         } else {
8815           llvm::Value *Ptr;
8816           if (IE->isGLValue())
8817             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8818           else
8819             Ptr = CGF.EmitScalarExpr(IE);
8820           CombinedInfo.Exprs.push_back(VD);
8821           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8822           CombinedInfo.Pointers.push_back(Ptr);
8823           CombinedInfo.Sizes.push_back(
8824               llvm::Constant::getNullValue(CGF.Int64Ty));
8825           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8826           CombinedInfo.Mappers.push_back(nullptr);
8827         }
8828       }
8829     }
8830 
8831     for (const auto &Data : Info) {
8832       StructRangeInfoTy PartialStruct;
8833       // Temporary generated information.
8834       MapCombinedInfoTy CurInfo;
8835       const Decl *D = Data.first;
8836       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8837       for (const auto &M : Data.second) {
8838         for (const MapInfo &L : M) {
8839           assert(!L.Components.empty() &&
8840                  "Not expecting declaration with no component lists.");
8841 
8842           // Remember the current base pointer index.
8843           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8844           CurInfo.NonContigInfo.IsNonContiguous =
8845               L.Components.back().isNonContiguous();
8846           generateInfoForComponentList(
8847               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8848               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8849               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8850 
8851           // If this entry relates with a device pointer, set the relevant
8852           // declaration and add the 'return pointer' flag.
8853           if (L.ReturnDevicePointer) {
8854             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8855                    "Unexpected number of mapped base pointers.");
8856 
8857             const ValueDecl *RelevantVD =
8858                 L.Components.back().getAssociatedDeclaration();
8859             assert(RelevantVD &&
8860                    "No relevant declaration related with device pointer??");
8861 
8862             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8863                 RelevantVD);
8864             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8865           }
8866         }
8867       }
8868 
8869       // Append any pending zero-length pointers which are struct members and
8870       // used with use_device_ptr or use_device_addr.
8871       auto CI = DeferredInfo.find(Data.first);
8872       if (CI != DeferredInfo.end()) {
8873         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8874           llvm::Value *BasePtr;
8875           llvm::Value *Ptr;
8876           if (L.ForDeviceAddr) {
8877             if (L.IE->isGLValue())
8878               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8879             else
8880               Ptr = this->CGF.EmitScalarExpr(L.IE);
8881             BasePtr = Ptr;
8882             // Entry is RETURN_PARAM. Also, set the placeholder value
8883             // MEMBER_OF=FFFF so that the entry is later updated with the
8884             // correct value of MEMBER_OF.
8885             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8886           } else {
8887             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8888             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8889                                              L.IE->getExprLoc());
8890             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8891             // placeholder value MEMBER_OF=FFFF so that the entry is later
8892             // updated with the correct value of MEMBER_OF.
8893             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8894                                     OMP_MAP_MEMBER_OF);
8895           }
8896           CurInfo.Exprs.push_back(L.VD);
8897           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8898           CurInfo.Pointers.push_back(Ptr);
8899           CurInfo.Sizes.push_back(
8900               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8901           CurInfo.Mappers.push_back(nullptr);
8902         }
8903       }
8904       // If there is an entry in PartialStruct it means we have a struct with
8905       // individual members mapped. Emit an extra combined entry.
8906       if (PartialStruct.Base.isValid()) {
8907         CurInfo.NonContigInfo.Dims.push_back(0);
8908         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8909       }
8910 
8911       // We need to append the results of this capture to what we already
8912       // have.
8913       CombinedInfo.append(CurInfo);
8914     }
8915     // Append data for use_device_ptr clauses.
8916     CombinedInfo.append(UseDevicePtrCombinedInfo);
8917   }
8918 
8919 public:
8920   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8921       : CurDir(&Dir), CGF(CGF) {
8922     // Extract firstprivate clause information.
8923     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8924       for (const auto *D : C->varlists())
8925         FirstPrivateDecls.try_emplace(
8926             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8927     // Extract implicit firstprivates from uses_allocators clauses.
8928     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8929       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8930         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8931         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8932           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8933                                         /*Implicit=*/true);
8934         else if (const auto *VD = dyn_cast<VarDecl>(
8935                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8936                          ->getDecl()))
8937           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8938       }
8939     }
8940     // Extract device pointer clause information.
8941     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8942       for (auto L : C->component_lists())
8943         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8944     // Extract map information.
8945     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8946       if (C->getMapType() != OMPC_MAP_to)
8947         continue;
8948       for (auto L : C->component_lists()) {
8949         const ValueDecl *VD = std::get<0>(L);
8950         const auto *RD = VD ? VD->getType()
8951                                   .getCanonicalType()
8952                                   .getNonReferenceType()
8953                                   ->getAsCXXRecordDecl()
8954                             : nullptr;
8955         if (RD && RD->isLambda())
8956           LambdasMap.try_emplace(std::get<0>(L), C);
8957       }
8958     }
8959   }
8960 
8961   /// Constructor for the declare mapper directive.
8962   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8963       : CurDir(&Dir), CGF(CGF) {}
8964 
8965   /// Generate code for the combined entry if we have a partially mapped struct
8966   /// and take care of the mapping flags of the arguments corresponding to
8967   /// individual struct members.
8968   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8969                          MapFlagsArrayTy &CurTypes,
8970                          const StructRangeInfoTy &PartialStruct,
8971                          const ValueDecl *VD = nullptr,
8972                          bool NotTargetParams = true) const {
8973     if (CurTypes.size() == 1 &&
8974         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8975         !PartialStruct.IsArraySection)
8976       return;
8977     Address LBAddr = PartialStruct.LowestElem.second;
8978     Address HBAddr = PartialStruct.HighestElem.second;
8979     if (PartialStruct.HasCompleteRecord) {
8980       LBAddr = PartialStruct.LB;
8981       HBAddr = PartialStruct.LB;
8982     }
8983     CombinedInfo.Exprs.push_back(VD);
8984     // Base is the base of the struct
8985     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8986     // Pointer is the address of the lowest element
8987     llvm::Value *LB = LBAddr.getPointer();
8988     CombinedInfo.Pointers.push_back(LB);
8989     // There should not be a mapper for a combined entry.
8990     CombinedInfo.Mappers.push_back(nullptr);
8991     // Size is (addr of {highest+1} element) - (addr of lowest element)
8992     llvm::Value *HB = HBAddr.getPointer();
8993     llvm::Value *HAddr =
8994         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8995     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8996     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8997     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8998     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8999                                                   /*isSigned=*/false);
9000     CombinedInfo.Sizes.push_back(Size);
9001     // Map type is always TARGET_PARAM, if generate info for captures.
9002     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
9003                                                  : OMP_MAP_TARGET_PARAM);
9004     // If any element has the present modifier, then make sure the runtime
9005     // doesn't attempt to allocate the struct.
9006     if (CurTypes.end() !=
9007         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9008           return Type & OMP_MAP_PRESENT;
9009         }))
9010       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
9011     // Remove TARGET_PARAM flag from the first element
9012     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
9013     // If any element has the ompx_hold modifier, then make sure the runtime
9014     // uses the hold reference count for the struct as a whole so that it won't
9015     // be unmapped by an extra dynamic reference count decrement.  Add it to all
9016     // elements as well so the runtime knows which reference count to check
9017     // when determining whether it's time for device-to-host transfers of
9018     // individual elements.
9019     if (CurTypes.end() !=
9020         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9021           return Type & OMP_MAP_OMPX_HOLD;
9022         })) {
9023       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
9024       for (auto &M : CurTypes)
9025         M |= OMP_MAP_OMPX_HOLD;
9026     }
9027 
9028     // All other current entries will be MEMBER_OF the combined entry
9029     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9030     // 0xFFFF in the MEMBER_OF field).
9031     OpenMPOffloadMappingFlags MemberOfFlag =
9032         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9033     for (auto &M : CurTypes)
9034       setCorrectMemberOfFlag(M, MemberOfFlag);
9035   }
9036 
9037   /// Generate all the base pointers, section pointers, sizes, map types, and
9038   /// mappers for the extracted mappable expressions (all included in \a
9039   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9040   /// pair of the relevant declaration and index where it occurs is appended to
9041   /// the device pointers info array.
9042   void generateAllInfo(
9043       MapCombinedInfoTy &CombinedInfo,
9044       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9045           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9046     assert(CurDir.is<const OMPExecutableDirective *>() &&
9047            "Expect a executable directive");
9048     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9049     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9050   }
9051 
9052   /// Generate all the base pointers, section pointers, sizes, map types, and
9053   /// mappers for the extracted map clauses of user-defined mapper (all included
9054   /// in \a CombinedInfo).
9055   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9056     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9057            "Expect a declare mapper directive");
9058     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9059     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9060   }
9061 
9062   /// Emit capture info for lambdas for variables captured by reference.
9063   void generateInfoForLambdaCaptures(
9064       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9065       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9066     const auto *RD = VD->getType()
9067                          .getCanonicalType()
9068                          .getNonReferenceType()
9069                          ->getAsCXXRecordDecl();
9070     if (!RD || !RD->isLambda())
9071       return;
9072     Address VDAddr =
9073         Address::deprecated(Arg, CGF.getContext().getDeclAlign(VD));
9074     LValue VDLVal = CGF.MakeAddrLValue(
9075         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9076     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9077     FieldDecl *ThisCapture = nullptr;
9078     RD->getCaptureFields(Captures, ThisCapture);
9079     if (ThisCapture) {
9080       LValue ThisLVal =
9081           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9082       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9083       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9084                                  VDLVal.getPointer(CGF));
9085       CombinedInfo.Exprs.push_back(VD);
9086       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9087       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9088       CombinedInfo.Sizes.push_back(
9089           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9090                                     CGF.Int64Ty, /*isSigned=*/true));
9091       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9092                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9093       CombinedInfo.Mappers.push_back(nullptr);
9094     }
9095     for (const LambdaCapture &LC : RD->captures()) {
9096       if (!LC.capturesVariable())
9097         continue;
9098       const VarDecl *VD = LC.getCapturedVar();
9099       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9100         continue;
9101       auto It = Captures.find(VD);
9102       assert(It != Captures.end() && "Found lambda capture without field.");
9103       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9104       if (LC.getCaptureKind() == LCK_ByRef) {
9105         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9106         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9107                                    VDLVal.getPointer(CGF));
9108         CombinedInfo.Exprs.push_back(VD);
9109         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9110         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9111         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9112             CGF.getTypeSize(
9113                 VD->getType().getCanonicalType().getNonReferenceType()),
9114             CGF.Int64Ty, /*isSigned=*/true));
9115       } else {
9116         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9117         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9118                                    VDLVal.getPointer(CGF));
9119         CombinedInfo.Exprs.push_back(VD);
9120         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9121         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9122         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9123       }
9124       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9125                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9126       CombinedInfo.Mappers.push_back(nullptr);
9127     }
9128   }
9129 
9130   /// Set correct indices for lambdas captures.
9131   void adjustMemberOfForLambdaCaptures(
9132       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9133       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9134       MapFlagsArrayTy &Types) const {
9135     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9136       // Set correct member_of idx for all implicit lambda captures.
9137       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9138                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9139         continue;
9140       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9141       assert(BasePtr && "Unable to find base lambda address.");
9142       int TgtIdx = -1;
9143       for (unsigned J = I; J > 0; --J) {
9144         unsigned Idx = J - 1;
9145         if (Pointers[Idx] != BasePtr)
9146           continue;
9147         TgtIdx = Idx;
9148         break;
9149       }
9150       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9151       // All other current entries will be MEMBER_OF the combined entry
9152       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9153       // 0xFFFF in the MEMBER_OF field).
9154       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9155       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9156     }
9157   }
9158 
9159   /// Generate the base pointers, section pointers, sizes, map types, and
9160   /// mappers associated to a given capture (all included in \a CombinedInfo).
9161   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9162                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9163                               StructRangeInfoTy &PartialStruct) const {
9164     assert(!Cap->capturesVariableArrayType() &&
9165            "Not expecting to generate map info for a variable array type!");
9166 
9167     // We need to know when we generating information for the first component
9168     const ValueDecl *VD = Cap->capturesThis()
9169                               ? nullptr
9170                               : Cap->getCapturedVar()->getCanonicalDecl();
9171 
9172     // for map(to: lambda): skip here, processing it in
9173     // generateDefaultMapInfo
9174     if (LambdasMap.count(VD))
9175       return;
9176 
9177     // If this declaration appears in a is_device_ptr clause we just have to
9178     // pass the pointer by value. If it is a reference to a declaration, we just
9179     // pass its value.
9180     if (DevPointersMap.count(VD)) {
9181       CombinedInfo.Exprs.push_back(VD);
9182       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9183       CombinedInfo.Pointers.push_back(Arg);
9184       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9185           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9186           /*isSigned=*/true));
9187       CombinedInfo.Types.push_back(
9188           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9189           OMP_MAP_TARGET_PARAM);
9190       CombinedInfo.Mappers.push_back(nullptr);
9191       return;
9192     }
9193 
9194     using MapData =
9195         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9196                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9197                    const ValueDecl *, const Expr *>;
9198     SmallVector<MapData, 4> DeclComponentLists;
9199     assert(CurDir.is<const OMPExecutableDirective *>() &&
9200            "Expect a executable directive");
9201     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9202     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9203       const auto *EI = C->getVarRefs().begin();
9204       for (const auto L : C->decl_component_lists(VD)) {
9205         const ValueDecl *VDecl, *Mapper;
9206         // The Expression is not correct if the mapping is implicit
9207         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9208         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9209         std::tie(VDecl, Components, Mapper) = L;
9210         assert(VDecl == VD && "We got information for the wrong declaration??");
9211         assert(!Components.empty() &&
9212                "Not expecting declaration with no component lists.");
9213         DeclComponentLists.emplace_back(Components, C->getMapType(),
9214                                         C->getMapTypeModifiers(),
9215                                         C->isImplicit(), Mapper, E);
9216         ++EI;
9217       }
9218     }
9219     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9220                                              const MapData &RHS) {
9221       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9222       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9223       bool HasPresent =
9224           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9225       bool HasAllocs = MapType == OMPC_MAP_alloc;
9226       MapModifiers = std::get<2>(RHS);
9227       MapType = std::get<1>(LHS);
9228       bool HasPresentR =
9229           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9230       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9231       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9232     });
9233 
9234     // Find overlapping elements (including the offset from the base element).
9235     llvm::SmallDenseMap<
9236         const MapData *,
9237         llvm::SmallVector<
9238             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9239         4>
9240         OverlappedData;
9241     size_t Count = 0;
9242     for (const MapData &L : DeclComponentLists) {
9243       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9244       OpenMPMapClauseKind MapType;
9245       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9246       bool IsImplicit;
9247       const ValueDecl *Mapper;
9248       const Expr *VarRef;
9249       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9250           L;
9251       ++Count;
9252       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9253         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9254         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9255                  VarRef) = L1;
9256         auto CI = Components.rbegin();
9257         auto CE = Components.rend();
9258         auto SI = Components1.rbegin();
9259         auto SE = Components1.rend();
9260         for (; CI != CE && SI != SE; ++CI, ++SI) {
9261           if (CI->getAssociatedExpression()->getStmtClass() !=
9262               SI->getAssociatedExpression()->getStmtClass())
9263             break;
9264           // Are we dealing with different variables/fields?
9265           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9266             break;
9267         }
9268         // Found overlapping if, at least for one component, reached the head
9269         // of the components list.
9270         if (CI == CE || SI == SE) {
9271           // Ignore it if it is the same component.
9272           if (CI == CE && SI == SE)
9273             continue;
9274           const auto It = (SI == SE) ? CI : SI;
9275           // If one component is a pointer and another one is a kind of
9276           // dereference of this pointer (array subscript, section, dereference,
9277           // etc.), it is not an overlapping.
9278           // Same, if one component is a base and another component is a
9279           // dereferenced pointer memberexpr with the same base.
9280           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9281               (std::prev(It)->getAssociatedDeclaration() &&
9282                std::prev(It)
9283                    ->getAssociatedDeclaration()
9284                    ->getType()
9285                    ->isPointerType()) ||
9286               (It->getAssociatedDeclaration() &&
9287                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9288                std::next(It) != CE && std::next(It) != SE))
9289             continue;
9290           const MapData &BaseData = CI == CE ? L : L1;
9291           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9292               SI == SE ? Components : Components1;
9293           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9294           OverlappedElements.getSecond().push_back(SubData);
9295         }
9296       }
9297     }
9298     // Sort the overlapped elements for each item.
9299     llvm::SmallVector<const FieldDecl *, 4> Layout;
9300     if (!OverlappedData.empty()) {
9301       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9302       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9303       while (BaseType != OrigType) {
9304         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9305         OrigType = BaseType->getPointeeOrArrayElementType();
9306       }
9307 
9308       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9309         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9310       else {
9311         const auto *RD = BaseType->getAsRecordDecl();
9312         Layout.append(RD->field_begin(), RD->field_end());
9313       }
9314     }
9315     for (auto &Pair : OverlappedData) {
9316       llvm::stable_sort(
9317           Pair.getSecond(),
9318           [&Layout](
9319               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9320               OMPClauseMappableExprCommon::MappableExprComponentListRef
9321                   Second) {
9322             auto CI = First.rbegin();
9323             auto CE = First.rend();
9324             auto SI = Second.rbegin();
9325             auto SE = Second.rend();
9326             for (; CI != CE && SI != SE; ++CI, ++SI) {
9327               if (CI->getAssociatedExpression()->getStmtClass() !=
9328                   SI->getAssociatedExpression()->getStmtClass())
9329                 break;
9330               // Are we dealing with different variables/fields?
9331               if (CI->getAssociatedDeclaration() !=
9332                   SI->getAssociatedDeclaration())
9333                 break;
9334             }
9335 
9336             // Lists contain the same elements.
9337             if (CI == CE && SI == SE)
9338               return false;
9339 
9340             // List with less elements is less than list with more elements.
9341             if (CI == CE || SI == SE)
9342               return CI == CE;
9343 
9344             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9345             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9346             if (FD1->getParent() == FD2->getParent())
9347               return FD1->getFieldIndex() < FD2->getFieldIndex();
9348             const auto *It =
9349                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9350                   return FD == FD1 || FD == FD2;
9351                 });
9352             return *It == FD1;
9353           });
9354     }
9355 
9356     // Associated with a capture, because the mapping flags depend on it.
9357     // Go through all of the elements with the overlapped elements.
9358     bool IsFirstComponentList = true;
9359     for (const auto &Pair : OverlappedData) {
9360       const MapData &L = *Pair.getFirst();
9361       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9362       OpenMPMapClauseKind MapType;
9363       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9364       bool IsImplicit;
9365       const ValueDecl *Mapper;
9366       const Expr *VarRef;
9367       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9368           L;
9369       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9370           OverlappedComponents = Pair.getSecond();
9371       generateInfoForComponentList(
9372           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9373           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9374           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9375       IsFirstComponentList = false;
9376     }
9377     // Go through other elements without overlapped elements.
9378     for (const MapData &L : DeclComponentLists) {
9379       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9380       OpenMPMapClauseKind MapType;
9381       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9382       bool IsImplicit;
9383       const ValueDecl *Mapper;
9384       const Expr *VarRef;
9385       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9386           L;
9387       auto It = OverlappedData.find(&L);
9388       if (It == OverlappedData.end())
9389         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9390                                      Components, CombinedInfo, PartialStruct,
9391                                      IsFirstComponentList, IsImplicit, Mapper,
9392                                      /*ForDeviceAddr=*/false, VD, VarRef);
9393       IsFirstComponentList = false;
9394     }
9395   }
9396 
9397   /// Generate the default map information for a given capture \a CI,
9398   /// record field declaration \a RI and captured value \a CV.
9399   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9400                               const FieldDecl &RI, llvm::Value *CV,
9401                               MapCombinedInfoTy &CombinedInfo) const {
9402     bool IsImplicit = true;
9403     // Do the default mapping.
9404     if (CI.capturesThis()) {
9405       CombinedInfo.Exprs.push_back(nullptr);
9406       CombinedInfo.BasePointers.push_back(CV);
9407       CombinedInfo.Pointers.push_back(CV);
9408       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9409       CombinedInfo.Sizes.push_back(
9410           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9411                                     CGF.Int64Ty, /*isSigned=*/true));
9412       // Default map type.
9413       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9414     } else if (CI.capturesVariableByCopy()) {
9415       const VarDecl *VD = CI.getCapturedVar();
9416       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9417       CombinedInfo.BasePointers.push_back(CV);
9418       CombinedInfo.Pointers.push_back(CV);
9419       if (!RI.getType()->isAnyPointerType()) {
9420         // We have to signal to the runtime captures passed by value that are
9421         // not pointers.
9422         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9423         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9424             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9425       } else {
9426         // Pointers are implicitly mapped with a zero size and no flags
9427         // (other than first map that is added for all implicit maps).
9428         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9429         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9430       }
9431       auto I = FirstPrivateDecls.find(VD);
9432       if (I != FirstPrivateDecls.end())
9433         IsImplicit = I->getSecond();
9434     } else {
9435       assert(CI.capturesVariable() && "Expected captured reference.");
9436       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9437       QualType ElementType = PtrTy->getPointeeType();
9438       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9439           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9440       // The default map type for a scalar/complex type is 'to' because by
9441       // default the value doesn't have to be retrieved. For an aggregate
9442       // type, the default is 'tofrom'.
9443       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9444       const VarDecl *VD = CI.getCapturedVar();
9445       auto I = FirstPrivateDecls.find(VD);
9446       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9447       CombinedInfo.BasePointers.push_back(CV);
9448       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9449         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9450             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9451             AlignmentSource::Decl));
9452         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9453       } else {
9454         CombinedInfo.Pointers.push_back(CV);
9455       }
9456       if (I != FirstPrivateDecls.end())
9457         IsImplicit = I->getSecond();
9458     }
9459     // Every default map produces a single argument which is a target parameter.
9460     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9461 
9462     // Add flag stating this is an implicit map.
9463     if (IsImplicit)
9464       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9465 
9466     // No user-defined mapper for default mapping.
9467     CombinedInfo.Mappers.push_back(nullptr);
9468   }
9469 };
9470 } // anonymous namespace
9471 
9472 static void emitNonContiguousDescriptor(
9473     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9474     CGOpenMPRuntime::TargetDataInfo &Info) {
9475   CodeGenModule &CGM = CGF.CGM;
9476   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9477       &NonContigInfo = CombinedInfo.NonContigInfo;
9478 
9479   // Build an array of struct descriptor_dim and then assign it to
9480   // offload_args.
9481   //
9482   // struct descriptor_dim {
9483   //  uint64_t offset;
9484   //  uint64_t count;
9485   //  uint64_t stride
9486   // };
9487   ASTContext &C = CGF.getContext();
9488   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9489   RecordDecl *RD;
9490   RD = C.buildImplicitRecord("descriptor_dim");
9491   RD->startDefinition();
9492   addFieldToRecordDecl(C, RD, Int64Ty);
9493   addFieldToRecordDecl(C, RD, Int64Ty);
9494   addFieldToRecordDecl(C, RD, Int64Ty);
9495   RD->completeDefinition();
9496   QualType DimTy = C.getRecordType(RD);
9497 
9498   enum { OffsetFD = 0, CountFD, StrideFD };
9499   // We need two index variable here since the size of "Dims" is the same as the
9500   // size of Components, however, the size of offset, count, and stride is equal
9501   // to the size of base declaration that is non-contiguous.
9502   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9503     // Skip emitting ir if dimension size is 1 since it cannot be
9504     // non-contiguous.
9505     if (NonContigInfo.Dims[I] == 1)
9506       continue;
9507     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9508     QualType ArrayTy =
9509         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9510     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9511     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9512       unsigned RevIdx = EE - II - 1;
9513       LValue DimsLVal = CGF.MakeAddrLValue(
9514           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9515       // Offset
9516       LValue OffsetLVal = CGF.EmitLValueForField(
9517           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9518       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9519       // Count
9520       LValue CountLVal = CGF.EmitLValueForField(
9521           DimsLVal, *std::next(RD->field_begin(), CountFD));
9522       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9523       // Stride
9524       LValue StrideLVal = CGF.EmitLValueForField(
9525           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9526       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9527     }
9528     // args[I] = &dims
9529     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9530         DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9531     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9532         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9533         Info.PointersArray, 0, I);
9534     Address PAddr = Address::deprecated(P, CGF.getPointerAlign());
9535     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9536     ++L;
9537   }
9538 }
9539 
9540 // Try to extract the base declaration from a `this->x` expression if possible.
9541 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9542   if (!E)
9543     return nullptr;
9544 
9545   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9546     if (const MemberExpr *ME =
9547             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9548       return ME->getMemberDecl();
9549   return nullptr;
9550 }
9551 
9552 /// Emit a string constant containing the names of the values mapped to the
9553 /// offloading runtime library.
9554 llvm::Constant *
9555 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9556                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9557 
9558   uint32_t SrcLocStrSize;
9559   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9560     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9561 
9562   SourceLocation Loc;
9563   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9564     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9565       Loc = VD->getLocation();
9566     else
9567       Loc = MapExprs.getMapExpr()->getExprLoc();
9568   } else {
9569     Loc = MapExprs.getMapDecl()->getLocation();
9570   }
9571 
9572   std::string ExprName;
9573   if (MapExprs.getMapExpr()) {
9574     PrintingPolicy P(CGF.getContext().getLangOpts());
9575     llvm::raw_string_ostream OS(ExprName);
9576     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9577     OS.flush();
9578   } else {
9579     ExprName = MapExprs.getMapDecl()->getNameAsString();
9580   }
9581 
9582   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9583   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9584                                          PLoc.getLine(), PLoc.getColumn(),
9585                                          SrcLocStrSize);
9586 }
9587 
9588 /// Emit the arrays used to pass the captures and map information to the
9589 /// offloading runtime library. If there is no map or capture information,
9590 /// return nullptr by reference.
9591 static void emitOffloadingArrays(
9592     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9593     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9594     bool IsNonContiguous = false) {
9595   CodeGenModule &CGM = CGF.CGM;
9596   ASTContext &Ctx = CGF.getContext();
9597 
9598   // Reset the array information.
9599   Info.clearArrayInfo();
9600   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9601 
9602   if (Info.NumberOfPtrs) {
9603     // Detect if we have any capture size requiring runtime evaluation of the
9604     // size so that a constant array could be eventually used.
9605 
9606     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9607     QualType PointerArrayType = Ctx.getConstantArrayType(
9608         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9609         /*IndexTypeQuals=*/0);
9610 
9611     Info.BasePointersArray =
9612         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9613     Info.PointersArray =
9614         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9615     Address MappersArray =
9616         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9617     Info.MappersArray = MappersArray.getPointer();
9618 
9619     // If we don't have any VLA types or other types that require runtime
9620     // evaluation, we can use a constant array for the map sizes, otherwise we
9621     // need to fill up the arrays as we do for the pointers.
9622     QualType Int64Ty =
9623         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9624     SmallVector<llvm::Constant *> ConstSizes(
9625         CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9626     llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9627     for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9628       if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9629         if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9630           if (IsNonContiguous && (CombinedInfo.Types[I] &
9631                                   MappableExprsHandler::OMP_MAP_NON_CONTIG))
9632             ConstSizes[I] = llvm::ConstantInt::get(
9633                 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9634           else
9635             ConstSizes[I] = CI;
9636           continue;
9637         }
9638       }
9639       RuntimeSizes.set(I);
9640     }
9641 
9642     if (RuntimeSizes.all()) {
9643       QualType SizeArrayType = Ctx.getConstantArrayType(
9644           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9645           /*IndexTypeQuals=*/0);
9646       Info.SizesArray =
9647           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9648     } else {
9649       auto *SizesArrayInit = llvm::ConstantArray::get(
9650           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9651       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9652       auto *SizesArrayGbl = new llvm::GlobalVariable(
9653           CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9654           llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9655       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9656       if (RuntimeSizes.any()) {
9657         QualType SizeArrayType = Ctx.getConstantArrayType(
9658             Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9659             /*IndexTypeQuals=*/0);
9660         Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9661         llvm::Value *GblConstPtr =
9662             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9663                 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9664         CGF.Builder.CreateMemCpy(
9665             Buffer,
9666             Address(GblConstPtr, CGM.Int64Ty,
9667                     CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9668                         /*DestWidth=*/64, /*Signed=*/false))),
9669             CGF.getTypeSize(SizeArrayType));
9670         Info.SizesArray = Buffer.getPointer();
9671       } else {
9672         Info.SizesArray = SizesArrayGbl;
9673       }
9674     }
9675 
9676     // The map types are always constant so we don't need to generate code to
9677     // fill arrays. Instead, we create an array constant.
9678     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9679     llvm::copy(CombinedInfo.Types, Mapping.begin());
9680     std::string MaptypesName =
9681         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9682     auto *MapTypesArrayGbl =
9683         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9684     Info.MapTypesArray = MapTypesArrayGbl;
9685 
9686     // The information types are only built if there is debug information
9687     // requested.
9688     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9689       Info.MapNamesArray = llvm::Constant::getNullValue(
9690           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9691     } else {
9692       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9693         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9694       };
9695       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9696       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9697       std::string MapnamesName =
9698           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9699       auto *MapNamesArrayGbl =
9700           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9701       Info.MapNamesArray = MapNamesArrayGbl;
9702     }
9703 
9704     // If there's a present map type modifier, it must not be applied to the end
9705     // of a region, so generate a separate map type array in that case.
9706     if (Info.separateBeginEndCalls()) {
9707       bool EndMapTypesDiffer = false;
9708       for (uint64_t &Type : Mapping) {
9709         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9710           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9711           EndMapTypesDiffer = true;
9712         }
9713       }
9714       if (EndMapTypesDiffer) {
9715         MapTypesArrayGbl =
9716             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9717         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9718       }
9719     }
9720 
9721     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9722       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9723       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9724           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9725           Info.BasePointersArray, 0, I);
9726       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9727           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9728       Address BPAddr =
9729           Address::deprecated(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9730       CGF.Builder.CreateStore(BPVal, BPAddr);
9731 
9732       if (Info.requiresDevicePointerInfo())
9733         if (const ValueDecl *DevVD =
9734                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9735           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9736 
9737       llvm::Value *PVal = CombinedInfo.Pointers[I];
9738       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9739           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9740           Info.PointersArray, 0, I);
9741       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9742           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9743       Address PAddr =
9744           Address::deprecated(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9745       CGF.Builder.CreateStore(PVal, PAddr);
9746 
9747       if (RuntimeSizes.test(I)) {
9748         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9749             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9750             Info.SizesArray,
9751             /*Idx0=*/0,
9752             /*Idx1=*/I);
9753         Address SAddr =
9754             Address::deprecated(S, Ctx.getTypeAlignInChars(Int64Ty));
9755         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9756                                                           CGM.Int64Ty,
9757                                                           /*isSigned=*/true),
9758                                 SAddr);
9759       }
9760 
9761       // Fill up the mapper array.
9762       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9763       if (CombinedInfo.Mappers[I]) {
9764         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9765             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9766         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9767         Info.HasMapper = true;
9768       }
9769       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9770       CGF.Builder.CreateStore(MFunc, MAddr);
9771     }
9772   }
9773 
9774   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9775       Info.NumberOfPtrs == 0)
9776     return;
9777 
9778   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9779 }
9780 
9781 namespace {
9782 /// Additional arguments for emitOffloadingArraysArgument function.
9783 struct ArgumentsOptions {
9784   bool ForEndCall = false;
9785   ArgumentsOptions() = default;
9786   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9787 };
9788 } // namespace
9789 
9790 /// Emit the arguments to be passed to the runtime library based on the
9791 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9792 /// ForEndCall, emit map types to be passed for the end of the region instead of
9793 /// the beginning.
9794 static void emitOffloadingArraysArgument(
9795     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9796     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9797     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9798     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9799     const ArgumentsOptions &Options = ArgumentsOptions()) {
9800   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9801          "expected region end call to runtime only when end call is separate");
9802   CodeGenModule &CGM = CGF.CGM;
9803   if (Info.NumberOfPtrs) {
9804     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9805         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9806         Info.BasePointersArray,
9807         /*Idx0=*/0, /*Idx1=*/0);
9808     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9809         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9810         Info.PointersArray,
9811         /*Idx0=*/0,
9812         /*Idx1=*/0);
9813     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9814         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9815         /*Idx0=*/0, /*Idx1=*/0);
9816     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9817         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9818         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9819                                                     : Info.MapTypesArray,
9820         /*Idx0=*/0,
9821         /*Idx1=*/0);
9822 
9823     // Only emit the mapper information arrays if debug information is
9824     // requested.
9825     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9826       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9827     else
9828       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9829           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9830           Info.MapNamesArray,
9831           /*Idx0=*/0,
9832           /*Idx1=*/0);
9833     // If there is no user-defined mapper, set the mapper array to nullptr to
9834     // avoid an unnecessary data privatization
9835     if (!Info.HasMapper)
9836       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9837     else
9838       MappersArrayArg =
9839           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9840   } else {
9841     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9842     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9843     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9844     MapTypesArrayArg =
9845         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9846     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9847     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9848   }
9849 }
9850 
9851 /// Check for inner distribute directive.
9852 static const OMPExecutableDirective *
9853 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9854   const auto *CS = D.getInnermostCapturedStmt();
9855   const auto *Body =
9856       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9857   const Stmt *ChildStmt =
9858       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9859 
9860   if (const auto *NestedDir =
9861           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9862     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9863     switch (D.getDirectiveKind()) {
9864     case OMPD_target:
9865       if (isOpenMPDistributeDirective(DKind))
9866         return NestedDir;
9867       if (DKind == OMPD_teams) {
9868         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9869             /*IgnoreCaptured=*/true);
9870         if (!Body)
9871           return nullptr;
9872         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9873         if (const auto *NND =
9874                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9875           DKind = NND->getDirectiveKind();
9876           if (isOpenMPDistributeDirective(DKind))
9877             return NND;
9878         }
9879       }
9880       return nullptr;
9881     case OMPD_target_teams:
9882       if (isOpenMPDistributeDirective(DKind))
9883         return NestedDir;
9884       return nullptr;
9885     case OMPD_target_parallel:
9886     case OMPD_target_simd:
9887     case OMPD_target_parallel_for:
9888     case OMPD_target_parallel_for_simd:
9889       return nullptr;
9890     case OMPD_target_teams_distribute:
9891     case OMPD_target_teams_distribute_simd:
9892     case OMPD_target_teams_distribute_parallel_for:
9893     case OMPD_target_teams_distribute_parallel_for_simd:
9894     case OMPD_parallel:
9895     case OMPD_for:
9896     case OMPD_parallel_for:
9897     case OMPD_parallel_master:
9898     case OMPD_parallel_sections:
9899     case OMPD_for_simd:
9900     case OMPD_parallel_for_simd:
9901     case OMPD_cancel:
9902     case OMPD_cancellation_point:
9903     case OMPD_ordered:
9904     case OMPD_threadprivate:
9905     case OMPD_allocate:
9906     case OMPD_task:
9907     case OMPD_simd:
9908     case OMPD_tile:
9909     case OMPD_unroll:
9910     case OMPD_sections:
9911     case OMPD_section:
9912     case OMPD_single:
9913     case OMPD_master:
9914     case OMPD_critical:
9915     case OMPD_taskyield:
9916     case OMPD_barrier:
9917     case OMPD_taskwait:
9918     case OMPD_taskgroup:
9919     case OMPD_atomic:
9920     case OMPD_flush:
9921     case OMPD_depobj:
9922     case OMPD_scan:
9923     case OMPD_teams:
9924     case OMPD_target_data:
9925     case OMPD_target_exit_data:
9926     case OMPD_target_enter_data:
9927     case OMPD_distribute:
9928     case OMPD_distribute_simd:
9929     case OMPD_distribute_parallel_for:
9930     case OMPD_distribute_parallel_for_simd:
9931     case OMPD_teams_distribute:
9932     case OMPD_teams_distribute_simd:
9933     case OMPD_teams_distribute_parallel_for:
9934     case OMPD_teams_distribute_parallel_for_simd:
9935     case OMPD_target_update:
9936     case OMPD_declare_simd:
9937     case OMPD_declare_variant:
9938     case OMPD_begin_declare_variant:
9939     case OMPD_end_declare_variant:
9940     case OMPD_declare_target:
9941     case OMPD_end_declare_target:
9942     case OMPD_declare_reduction:
9943     case OMPD_declare_mapper:
9944     case OMPD_taskloop:
9945     case OMPD_taskloop_simd:
9946     case OMPD_master_taskloop:
9947     case OMPD_master_taskloop_simd:
9948     case OMPD_parallel_master_taskloop:
9949     case OMPD_parallel_master_taskloop_simd:
9950     case OMPD_requires:
9951     case OMPD_metadirective:
9952     case OMPD_unknown:
9953     default:
9954       llvm_unreachable("Unexpected directive.");
9955     }
9956   }
9957 
9958   return nullptr;
9959 }
9960 
9961 /// Emit the user-defined mapper function. The code generation follows the
9962 /// pattern in the example below.
9963 /// \code
9964 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9965 ///                                           void *base, void *begin,
9966 ///                                           int64_t size, int64_t type,
9967 ///                                           void *name = nullptr) {
9968 ///   // Allocate space for an array section first or add a base/begin for
9969 ///   // pointer dereference.
9970 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9971 ///       !maptype.IsDelete)
9972 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9973 ///                                 size*sizeof(Ty), clearToFromMember(type));
9974 ///   // Map members.
9975 ///   for (unsigned i = 0; i < size; i++) {
9976 ///     // For each component specified by this mapper:
9977 ///     for (auto c : begin[i]->all_components) {
9978 ///       if (c.hasMapper())
9979 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9980 ///                       c.arg_type, c.arg_name);
9981 ///       else
9982 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9983 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9984 ///                                     c.arg_name);
9985 ///     }
9986 ///   }
9987 ///   // Delete the array section.
9988 ///   if (size > 1 && maptype.IsDelete)
9989 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9990 ///                                 size*sizeof(Ty), clearToFromMember(type));
9991 /// }
9992 /// \endcode
9993 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9994                                             CodeGenFunction *CGF) {
9995   if (UDMMap.count(D) > 0)
9996     return;
9997   ASTContext &C = CGM.getContext();
9998   QualType Ty = D->getType();
9999   QualType PtrTy = C.getPointerType(Ty).withRestrict();
10000   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10001   auto *MapperVarDecl =
10002       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
10003   SourceLocation Loc = D->getLocation();
10004   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
10005   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
10006 
10007   // Prepare mapper function arguments and attributes.
10008   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
10009                               C.VoidPtrTy, ImplicitParamDecl::Other);
10010   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
10011                             ImplicitParamDecl::Other);
10012   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
10013                              C.VoidPtrTy, ImplicitParamDecl::Other);
10014   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
10015                             ImplicitParamDecl::Other);
10016   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
10017                             ImplicitParamDecl::Other);
10018   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
10019                             ImplicitParamDecl::Other);
10020   FunctionArgList Args;
10021   Args.push_back(&HandleArg);
10022   Args.push_back(&BaseArg);
10023   Args.push_back(&BeginArg);
10024   Args.push_back(&SizeArg);
10025   Args.push_back(&TypeArg);
10026   Args.push_back(&NameArg);
10027   const CGFunctionInfo &FnInfo =
10028       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
10029   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
10030   SmallString<64> TyStr;
10031   llvm::raw_svector_ostream Out(TyStr);
10032   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
10033   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10034   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
10035                                     Name, &CGM.getModule());
10036   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
10037   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
10038   // Start the mapper function code generation.
10039   CodeGenFunction MapperCGF(CGM);
10040   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
10041   // Compute the starting and end addresses of array elements.
10042   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
10043       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10044       C.getPointerType(Int64Ty), Loc);
10045   // Prepare common arguments for array initiation and deletion.
10046   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10047       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10048       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10049   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10050       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10051       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10052   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10053       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10054       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10055   // Convert the size in bytes into the number of array elements.
10056   Size = MapperCGF.Builder.CreateExactUDiv(
10057       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10058   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10059       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10060   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
10061   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10062       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10063       C.getPointerType(Int64Ty), Loc);
10064   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10065       MapperCGF.GetAddrOfLocalVar(&NameArg),
10066       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10067 
10068   // Emit array initiation if this is an array section and \p MapType indicates
10069   // that memory allocation is required.
10070   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10071   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10072                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10073 
10074   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10075 
10076   // Emit the loop header block.
10077   MapperCGF.EmitBlock(HeadBB);
10078   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10079   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10080   // Evaluate whether the initial condition is satisfied.
10081   llvm::Value *IsEmpty =
10082       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10083   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10084   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10085 
10086   // Emit the loop body block.
10087   MapperCGF.EmitBlock(BodyBB);
10088   llvm::BasicBlock *LastBB = BodyBB;
10089   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10090       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10091   PtrPHI->addIncoming(PtrBegin, EntryBB);
10092   Address PtrCurrent(PtrPHI, ElemTy,
10093                      MapperCGF.GetAddrOfLocalVar(&BeginArg)
10094                          .getAlignment()
10095                          .alignmentOfArrayElement(ElementSize));
10096   // Privatize the declared variable of mapper to be the current array element.
10097   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10098   Scope.addPrivate(MapperVarDecl, PtrCurrent);
10099   (void)Scope.Privatize();
10100 
10101   // Get map clause information. Fill up the arrays with all mapped variables.
10102   MappableExprsHandler::MapCombinedInfoTy Info;
10103   MappableExprsHandler MEHandler(*D, MapperCGF);
10104   MEHandler.generateAllInfoForMapper(Info);
10105 
10106   // Call the runtime API __tgt_mapper_num_components to get the number of
10107   // pre-existing components.
10108   llvm::Value *OffloadingArgs[] = {Handle};
10109   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10110       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10111                                             OMPRTL___tgt_mapper_num_components),
10112       OffloadingArgs);
10113   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10114       PreviousSize,
10115       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10116 
10117   // Fill up the runtime mapper handle for all components.
10118   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10119     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10120         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10121     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10122         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10123     llvm::Value *CurSizeArg = Info.Sizes[I];
10124     llvm::Value *CurNameArg =
10125         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10126             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10127             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10128 
10129     // Extract the MEMBER_OF field from the map type.
10130     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10131     llvm::Value *MemberMapType =
10132         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10133 
10134     // Combine the map type inherited from user-defined mapper with that
10135     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10136     // bits of the \a MapType, which is the input argument of the mapper
10137     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10138     // bits of MemberMapType.
10139     // [OpenMP 5.0], 1.2.6. map-type decay.
10140     //        | alloc |  to   | from  | tofrom | release | delete
10141     // ----------------------------------------------------------
10142     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10143     // to     | alloc |  to   | alloc |   to   | release | delete
10144     // from   | alloc | alloc | from  |  from  | release | delete
10145     // tofrom | alloc |  to   | from  | tofrom | release | delete
10146     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10147         MapType,
10148         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10149                                    MappableExprsHandler::OMP_MAP_FROM));
10150     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10151     llvm::BasicBlock *AllocElseBB =
10152         MapperCGF.createBasicBlock("omp.type.alloc.else");
10153     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10154     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10155     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10156     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10157     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10158     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10159     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10160     MapperCGF.EmitBlock(AllocBB);
10161     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10162         MemberMapType,
10163         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10164                                      MappableExprsHandler::OMP_MAP_FROM)));
10165     MapperCGF.Builder.CreateBr(EndBB);
10166     MapperCGF.EmitBlock(AllocElseBB);
10167     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10168         LeftToFrom,
10169         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10170     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10171     // In case of to, clear OMP_MAP_FROM.
10172     MapperCGF.EmitBlock(ToBB);
10173     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10174         MemberMapType,
10175         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10176     MapperCGF.Builder.CreateBr(EndBB);
10177     MapperCGF.EmitBlock(ToElseBB);
10178     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10179         LeftToFrom,
10180         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10181     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10182     // In case of from, clear OMP_MAP_TO.
10183     MapperCGF.EmitBlock(FromBB);
10184     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10185         MemberMapType,
10186         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10187     // In case of tofrom, do nothing.
10188     MapperCGF.EmitBlock(EndBB);
10189     LastBB = EndBB;
10190     llvm::PHINode *CurMapType =
10191         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10192     CurMapType->addIncoming(AllocMapType, AllocBB);
10193     CurMapType->addIncoming(ToMapType, ToBB);
10194     CurMapType->addIncoming(FromMapType, FromBB);
10195     CurMapType->addIncoming(MemberMapType, ToElseBB);
10196 
10197     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10198                                      CurSizeArg, CurMapType, CurNameArg};
10199     if (Info.Mappers[I]) {
10200       // Call the corresponding mapper function.
10201       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10202           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10203       assert(MapperFunc && "Expect a valid mapper function is available.");
10204       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10205     } else {
10206       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10207       // data structure.
10208       MapperCGF.EmitRuntimeCall(
10209           OMPBuilder.getOrCreateRuntimeFunction(
10210               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10211           OffloadingArgs);
10212     }
10213   }
10214 
10215   // Update the pointer to point to the next element that needs to be mapped,
10216   // and check whether we have mapped all elements.
10217   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10218       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10219   PtrPHI->addIncoming(PtrNext, LastBB);
10220   llvm::Value *IsDone =
10221       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10222   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10223   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10224 
10225   MapperCGF.EmitBlock(ExitBB);
10226   // Emit array deletion if this is an array section and \p MapType indicates
10227   // that deletion is required.
10228   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10229                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10230 
10231   // Emit the function exit block.
10232   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10233   MapperCGF.FinishFunction();
10234   UDMMap.try_emplace(D, Fn);
10235   if (CGF) {
10236     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10237     Decls.second.push_back(D);
10238   }
10239 }
10240 
10241 /// Emit the array initialization or deletion portion for user-defined mapper
10242 /// code generation. First, it evaluates whether an array section is mapped and
10243 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10244 /// true, and \a MapType indicates to not delete this array, array
10245 /// initialization code is generated. If \a IsInit is false, and \a MapType
10246 /// indicates to not this array, array deletion code is generated.
10247 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10248     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10249     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10250     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10251     bool IsInit) {
10252   StringRef Prefix = IsInit ? ".init" : ".del";
10253 
10254   // Evaluate if this is an array section.
10255   llvm::BasicBlock *BodyBB =
10256       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10257   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10258       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10259   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10260       MapType,
10261       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10262   llvm::Value *DeleteCond;
10263   llvm::Value *Cond;
10264   if (IsInit) {
10265     // base != begin?
10266     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10267     // IsPtrAndObj?
10268     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10269         MapType,
10270         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10271     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10272     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10273     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10274     DeleteCond = MapperCGF.Builder.CreateIsNull(
10275         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10276   } else {
10277     Cond = IsArray;
10278     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10279         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10280   }
10281   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10282   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10283 
10284   MapperCGF.EmitBlock(BodyBB);
10285   // Get the array size by multiplying element size and element number (i.e., \p
10286   // Size).
10287   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10288       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10289   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10290   // memory allocation/deletion purpose only.
10291   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10292       MapType,
10293       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10294                                    MappableExprsHandler::OMP_MAP_FROM)));
10295   MapTypeArg = MapperCGF.Builder.CreateOr(
10296       MapTypeArg,
10297       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10298 
10299   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10300   // data structure.
10301   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10302                                    ArraySize, MapTypeArg, MapName};
10303   MapperCGF.EmitRuntimeCall(
10304       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10305                                             OMPRTL___tgt_push_mapper_component),
10306       OffloadingArgs);
10307 }
10308 
10309 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10310     const OMPDeclareMapperDecl *D) {
10311   auto I = UDMMap.find(D);
10312   if (I != UDMMap.end())
10313     return I->second;
10314   emitUserDefinedMapper(D);
10315   return UDMMap.lookup(D);
10316 }
10317 
10318 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10319     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10320     llvm::Value *DeviceID,
10321     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10322                                      const OMPLoopDirective &D)>
10323         SizeEmitter) {
10324   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10325   const OMPExecutableDirective *TD = &D;
10326   // Get nested teams distribute kind directive, if any.
10327   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10328     TD = getNestedDistributeDirective(CGM.getContext(), D);
10329   if (!TD)
10330     return;
10331   const auto *LD = cast<OMPLoopDirective>(TD);
10332   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10333                                                          PrePostActionTy &) {
10334     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10335       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10336       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10337       CGF.EmitRuntimeCall(
10338           OMPBuilder.getOrCreateRuntimeFunction(
10339               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10340           Args);
10341     }
10342   };
10343   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10344 }
10345 
10346 void CGOpenMPRuntime::emitTargetCall(
10347     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10348     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10349     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10350     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10351                                      const OMPLoopDirective &D)>
10352         SizeEmitter) {
10353   if (!CGF.HaveInsertPoint())
10354     return;
10355 
10356   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
10357                                    CGM.getLangOpts().OpenMPOffloadMandatory;
10358 
10359   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10360 
10361   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10362                                  D.hasClausesOfKind<OMPNowaitClause>();
10363   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10364   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10365   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10366                                             PrePostActionTy &) {
10367     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10368   };
10369   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10370 
10371   CodeGenFunction::OMPTargetDataInfo InputInfo;
10372   llvm::Value *MapTypesArray = nullptr;
10373   llvm::Value *MapNamesArray = nullptr;
10374   // Generate code for the host fallback function.
10375   auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
10376                         &CS, OffloadingMandatory](CodeGenFunction &CGF) {
10377     if (OffloadingMandatory) {
10378       CGF.Builder.CreateUnreachable();
10379     } else {
10380       if (RequiresOuterTask) {
10381         CapturedVars.clear();
10382         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10383       }
10384       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10385     }
10386   };
10387   // Fill up the pointer arrays and transfer execution to the device.
10388   auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
10389                     &MapNamesArray, SizeEmitter,
10390                     FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10391     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10392       // Reverse offloading is not supported, so just execute on the host.
10393       FallbackGen(CGF);
10394       return;
10395     }
10396 
10397     // On top of the arrays that were filled up, the target offloading call
10398     // takes as arguments the device id as well as the host pointer. The host
10399     // pointer is used by the runtime library to identify the current target
10400     // region, so it only has to be unique and not necessarily point to
10401     // anything. It could be the pointer to the outlined function that
10402     // implements the target region, but we aren't using that so that the
10403     // compiler doesn't need to keep that, and could therefore inline the host
10404     // function if proven worthwhile during optimization.
10405 
10406     // From this point on, we need to have an ID of the target region defined.
10407     assert(OutlinedFnID && "Invalid outlined function ID!");
10408     (void)OutlinedFnID;
10409 
10410     // Emit device ID if any.
10411     llvm::Value *DeviceID;
10412     if (Device.getPointer()) {
10413       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10414               Device.getInt() == OMPC_DEVICE_device_num) &&
10415              "Expected device_num modifier.");
10416       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10417       DeviceID =
10418           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10419     } else {
10420       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10421     }
10422 
10423     // Emit the number of elements in the offloading arrays.
10424     llvm::Value *PointerNum =
10425         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10426 
10427     // Return value of the runtime offloading call.
10428     llvm::Value *Return;
10429 
10430     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10431     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10432 
10433     // Source location for the ident struct
10434     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10435 
10436     // Emit tripcount for the target loop-based directive.
10437     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10438 
10439     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10440     // The target region is an outlined function launched by the runtime
10441     // via calls __tgt_target() or __tgt_target_teams().
10442     //
10443     // __tgt_target() launches a target region with one team and one thread,
10444     // executing a serial region.  This master thread may in turn launch
10445     // more threads within its team upon encountering a parallel region,
10446     // however, no additional teams can be launched on the device.
10447     //
10448     // __tgt_target_teams() launches a target region with one or more teams,
10449     // each with one or more threads.  This call is required for target
10450     // constructs such as:
10451     //  'target teams'
10452     //  'target' / 'teams'
10453     //  'target teams distribute parallel for'
10454     //  'target parallel'
10455     // and so on.
10456     //
10457     // Note that on the host and CPU targets, the runtime implementation of
10458     // these calls simply call the outlined function without forking threads.
10459     // The outlined functions themselves have runtime calls to
10460     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10461     // the compiler in emitTeamsCall() and emitParallelCall().
10462     //
10463     // In contrast, on the NVPTX target, the implementation of
10464     // __tgt_target_teams() launches a GPU kernel with the requested number
10465     // of teams and threads so no additional calls to the runtime are required.
10466     if (NumTeams) {
10467       // If we have NumTeams defined this means that we have an enclosed teams
10468       // region. Therefore we also expect to have NumThreads defined. These two
10469       // values should be defined in the presence of a teams directive,
10470       // regardless of having any clauses associated. If the user is using teams
10471       // but no clauses, these two values will be the default that should be
10472       // passed to the runtime library - a 32-bit integer with the value zero.
10473       assert(NumThreads && "Thread limit expression should be available along "
10474                            "with number of teams.");
10475       SmallVector<llvm::Value *> OffloadingArgs = {
10476           RTLoc,
10477           DeviceID,
10478           OutlinedFnID,
10479           PointerNum,
10480           InputInfo.BasePointersArray.getPointer(),
10481           InputInfo.PointersArray.getPointer(),
10482           InputInfo.SizesArray.getPointer(),
10483           MapTypesArray,
10484           MapNamesArray,
10485           InputInfo.MappersArray.getPointer(),
10486           NumTeams,
10487           NumThreads};
10488       if (HasNowait) {
10489         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10490         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10491         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10492         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10493         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10494         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10495       }
10496       Return = CGF.EmitRuntimeCall(
10497           OMPBuilder.getOrCreateRuntimeFunction(
10498               CGM.getModule(), HasNowait
10499                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10500                                    : OMPRTL___tgt_target_teams_mapper),
10501           OffloadingArgs);
10502     } else {
10503       SmallVector<llvm::Value *> OffloadingArgs = {
10504           RTLoc,
10505           DeviceID,
10506           OutlinedFnID,
10507           PointerNum,
10508           InputInfo.BasePointersArray.getPointer(),
10509           InputInfo.PointersArray.getPointer(),
10510           InputInfo.SizesArray.getPointer(),
10511           MapTypesArray,
10512           MapNamesArray,
10513           InputInfo.MappersArray.getPointer()};
10514       if (HasNowait) {
10515         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10516         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10517         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10518         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10519         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10520         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10521       }
10522       Return = CGF.EmitRuntimeCall(
10523           OMPBuilder.getOrCreateRuntimeFunction(
10524               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10525                                          : OMPRTL___tgt_target_mapper),
10526           OffloadingArgs);
10527     }
10528 
10529     // Check the error code and execute the host version if required.
10530     llvm::BasicBlock *OffloadFailedBlock =
10531         CGF.createBasicBlock("omp_offload.failed");
10532     llvm::BasicBlock *OffloadContBlock =
10533         CGF.createBasicBlock("omp_offload.cont");
10534     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10535     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10536 
10537     CGF.EmitBlock(OffloadFailedBlock);
10538     FallbackGen(CGF);
10539 
10540     CGF.EmitBranch(OffloadContBlock);
10541 
10542     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10543   };
10544 
10545   // Notify that the host version must be executed.
10546   auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10547     FallbackGen(CGF);
10548   };
10549 
10550   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10551                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10552                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10553     // Fill up the arrays with all the captured variables.
10554     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10555 
10556     // Get mappable expression information.
10557     MappableExprsHandler MEHandler(D, CGF);
10558     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10559     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10560 
10561     auto RI = CS.getCapturedRecordDecl()->field_begin();
10562     auto *CV = CapturedVars.begin();
10563     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10564                                               CE = CS.capture_end();
10565          CI != CE; ++CI, ++RI, ++CV) {
10566       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10567       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10568 
10569       // VLA sizes are passed to the outlined region by copy and do not have map
10570       // information associated.
10571       if (CI->capturesVariableArrayType()) {
10572         CurInfo.Exprs.push_back(nullptr);
10573         CurInfo.BasePointers.push_back(*CV);
10574         CurInfo.Pointers.push_back(*CV);
10575         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10576             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10577         // Copy to the device as an argument. No need to retrieve it.
10578         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10579                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10580                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10581         CurInfo.Mappers.push_back(nullptr);
10582       } else {
10583         // If we have any information in the map clause, we use it, otherwise we
10584         // just do a default mapping.
10585         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10586         if (!CI->capturesThis())
10587           MappedVarSet.insert(CI->getCapturedVar());
10588         else
10589           MappedVarSet.insert(nullptr);
10590         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10591           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10592         // Generate correct mapping for variables captured by reference in
10593         // lambdas.
10594         if (CI->capturesVariable())
10595           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10596                                                   CurInfo, LambdaPointers);
10597       }
10598       // We expect to have at least an element of information for this capture.
10599       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10600              "Non-existing map pointer for capture!");
10601       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10602              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10603              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10604              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10605              "Inconsistent map information sizes!");
10606 
10607       // If there is an entry in PartialStruct it means we have a struct with
10608       // individual members mapped. Emit an extra combined entry.
10609       if (PartialStruct.Base.isValid()) {
10610         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10611         MEHandler.emitCombinedEntry(
10612             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10613             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10614       }
10615 
10616       // We need to append the results of this capture to what we already have.
10617       CombinedInfo.append(CurInfo);
10618     }
10619     // Adjust MEMBER_OF flags for the lambdas captures.
10620     MEHandler.adjustMemberOfForLambdaCaptures(
10621         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10622         CombinedInfo.Types);
10623     // Map any list items in a map clause that were not captures because they
10624     // weren't referenced within the construct.
10625     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10626 
10627     TargetDataInfo Info;
10628     // Fill up the arrays and create the arguments.
10629     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10630     emitOffloadingArraysArgument(
10631         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10632         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10633         {/*ForEndCall=*/false});
10634 
10635     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10636     InputInfo.BasePointersArray =
10637         Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign());
10638     InputInfo.PointersArray =
10639         Address::deprecated(Info.PointersArray, CGM.getPointerAlign());
10640     InputInfo.SizesArray =
10641         Address::deprecated(Info.SizesArray, CGM.getPointerAlign());
10642     InputInfo.MappersArray =
10643         Address::deprecated(Info.MappersArray, CGM.getPointerAlign());
10644     MapTypesArray = Info.MapTypesArray;
10645     MapNamesArray = Info.MapNamesArray;
10646     if (RequiresOuterTask)
10647       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10648     else
10649       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10650   };
10651 
10652   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10653                              CodeGenFunction &CGF, PrePostActionTy &) {
10654     if (RequiresOuterTask) {
10655       CodeGenFunction::OMPTargetDataInfo InputInfo;
10656       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10657     } else {
10658       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10659     }
10660   };
10661 
10662   // If we have a target function ID it means that we need to support
10663   // offloading, otherwise, just execute on the host. We need to execute on host
10664   // regardless of the conditional in the if clause if, e.g., the user do not
10665   // specify target triples.
10666   if (OutlinedFnID) {
10667     if (IfCond) {
10668       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10669     } else {
10670       RegionCodeGenTy ThenRCG(TargetThenGen);
10671       ThenRCG(CGF);
10672     }
10673   } else {
10674     RegionCodeGenTy ElseRCG(TargetElseGen);
10675     ElseRCG(CGF);
10676   }
10677 }
10678 
10679 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10680                                                     StringRef ParentName) {
10681   if (!S)
10682     return;
10683 
10684   // Codegen OMP target directives that offload compute to the device.
10685   bool RequiresDeviceCodegen =
10686       isa<OMPExecutableDirective>(S) &&
10687       isOpenMPTargetExecutionDirective(
10688           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10689 
10690   if (RequiresDeviceCodegen) {
10691     const auto &E = *cast<OMPExecutableDirective>(S);
10692     unsigned DeviceID;
10693     unsigned FileID;
10694     unsigned Line;
10695     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10696                              FileID, Line);
10697 
10698     // Is this a target region that should not be emitted as an entry point? If
10699     // so just signal we are done with this target region.
10700     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10701                                                             ParentName, Line))
10702       return;
10703 
10704     switch (E.getDirectiveKind()) {
10705     case OMPD_target:
10706       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10707                                                    cast<OMPTargetDirective>(E));
10708       break;
10709     case OMPD_target_parallel:
10710       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10711           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10712       break;
10713     case OMPD_target_teams:
10714       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10715           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10716       break;
10717     case OMPD_target_teams_distribute:
10718       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10719           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10720       break;
10721     case OMPD_target_teams_distribute_simd:
10722       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10723           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10724       break;
10725     case OMPD_target_parallel_for:
10726       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10727           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10728       break;
10729     case OMPD_target_parallel_for_simd:
10730       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10731           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10732       break;
10733     case OMPD_target_simd:
10734       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10735           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10736       break;
10737     case OMPD_target_teams_distribute_parallel_for:
10738       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10739           CGM, ParentName,
10740           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10741       break;
10742     case OMPD_target_teams_distribute_parallel_for_simd:
10743       CodeGenFunction::
10744           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10745               CGM, ParentName,
10746               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10747       break;
10748     case OMPD_parallel:
10749     case OMPD_for:
10750     case OMPD_parallel_for:
10751     case OMPD_parallel_master:
10752     case OMPD_parallel_sections:
10753     case OMPD_for_simd:
10754     case OMPD_parallel_for_simd:
10755     case OMPD_cancel:
10756     case OMPD_cancellation_point:
10757     case OMPD_ordered:
10758     case OMPD_threadprivate:
10759     case OMPD_allocate:
10760     case OMPD_task:
10761     case OMPD_simd:
10762     case OMPD_tile:
10763     case OMPD_unroll:
10764     case OMPD_sections:
10765     case OMPD_section:
10766     case OMPD_single:
10767     case OMPD_master:
10768     case OMPD_critical:
10769     case OMPD_taskyield:
10770     case OMPD_barrier:
10771     case OMPD_taskwait:
10772     case OMPD_taskgroup:
10773     case OMPD_atomic:
10774     case OMPD_flush:
10775     case OMPD_depobj:
10776     case OMPD_scan:
10777     case OMPD_teams:
10778     case OMPD_target_data:
10779     case OMPD_target_exit_data:
10780     case OMPD_target_enter_data:
10781     case OMPD_distribute:
10782     case OMPD_distribute_simd:
10783     case OMPD_distribute_parallel_for:
10784     case OMPD_distribute_parallel_for_simd:
10785     case OMPD_teams_distribute:
10786     case OMPD_teams_distribute_simd:
10787     case OMPD_teams_distribute_parallel_for:
10788     case OMPD_teams_distribute_parallel_for_simd:
10789     case OMPD_target_update:
10790     case OMPD_declare_simd:
10791     case OMPD_declare_variant:
10792     case OMPD_begin_declare_variant:
10793     case OMPD_end_declare_variant:
10794     case OMPD_declare_target:
10795     case OMPD_end_declare_target:
10796     case OMPD_declare_reduction:
10797     case OMPD_declare_mapper:
10798     case OMPD_taskloop:
10799     case OMPD_taskloop_simd:
10800     case OMPD_master_taskloop:
10801     case OMPD_master_taskloop_simd:
10802     case OMPD_parallel_master_taskloop:
10803     case OMPD_parallel_master_taskloop_simd:
10804     case OMPD_requires:
10805     case OMPD_metadirective:
10806     case OMPD_unknown:
10807     default:
10808       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10809     }
10810     return;
10811   }
10812 
10813   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10814     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10815       return;
10816 
10817     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10818     return;
10819   }
10820 
10821   // If this is a lambda function, look into its body.
10822   if (const auto *L = dyn_cast<LambdaExpr>(S))
10823     S = L->getBody();
10824 
10825   // Keep looking for target regions recursively.
10826   for (const Stmt *II : S->children())
10827     scanForTargetRegionsFunctions(II, ParentName);
10828 }
10829 
10830 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10831   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10832       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10833   if (!DevTy)
10834     return false;
10835   // Do not emit device_type(nohost) functions for the host.
10836   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10837     return true;
10838   // Do not emit device_type(host) functions for the device.
10839   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10840     return true;
10841   return false;
10842 }
10843 
10844 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10845   // If emitting code for the host, we do not process FD here. Instead we do
10846   // the normal code generation.
10847   if (!CGM.getLangOpts().OpenMPIsDevice) {
10848     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10849       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10850                                   CGM.getLangOpts().OpenMPIsDevice))
10851         return true;
10852     return false;
10853   }
10854 
10855   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10856   // Try to detect target regions in the function.
10857   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10858     StringRef Name = CGM.getMangledName(GD);
10859     scanForTargetRegionsFunctions(FD->getBody(), Name);
10860     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10861                                 CGM.getLangOpts().OpenMPIsDevice))
10862       return true;
10863   }
10864 
10865   // Do not to emit function if it is not marked as declare target.
10866   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10867          AlreadyEmittedTargetDecls.count(VD) == 0;
10868 }
10869 
10870 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10871   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10872                               CGM.getLangOpts().OpenMPIsDevice))
10873     return true;
10874 
10875   if (!CGM.getLangOpts().OpenMPIsDevice)
10876     return false;
10877 
10878   // Check if there are Ctors/Dtors in this declaration and look for target
10879   // regions in it. We use the complete variant to produce the kernel name
10880   // mangling.
10881   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10882   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10883     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10884       StringRef ParentName =
10885           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10886       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10887     }
10888     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10889       StringRef ParentName =
10890           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10891       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10892     }
10893   }
10894 
10895   // Do not to emit variable if it is not marked as declare target.
10896   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10897       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10898           cast<VarDecl>(GD.getDecl()));
10899   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10900       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10901        HasRequiresUnifiedSharedMemory)) {
10902     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10903     return true;
10904   }
10905   return false;
10906 }
10907 
10908 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10909                                                    llvm::Constant *Addr) {
10910   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10911       !CGM.getLangOpts().OpenMPIsDevice)
10912     return;
10913 
10914   // If we have host/nohost variables, they do not need to be registered.
10915   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10916       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10917   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10918     return;
10919 
10920   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10921       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10922   if (!Res) {
10923     if (CGM.getLangOpts().OpenMPIsDevice) {
10924       // Register non-target variables being emitted in device code (debug info
10925       // may cause this).
10926       StringRef VarName = CGM.getMangledName(VD);
10927       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10928     }
10929     return;
10930   }
10931   // Register declare target variables.
10932   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10933   StringRef VarName;
10934   CharUnits VarSize;
10935   llvm::GlobalValue::LinkageTypes Linkage;
10936 
10937   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10938       !HasRequiresUnifiedSharedMemory) {
10939     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10940     VarName = CGM.getMangledName(VD);
10941     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10942       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10943       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10944     } else {
10945       VarSize = CharUnits::Zero();
10946     }
10947     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10948     // Temp solution to prevent optimizations of the internal variables.
10949     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10950       // Do not create a "ref-variable" if the original is not also available
10951       // on the host.
10952       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10953         return;
10954       std::string RefName = getName({VarName, "ref"});
10955       if (!CGM.GetGlobalValue(RefName)) {
10956         llvm::Constant *AddrRef =
10957             getOrCreateInternalVariable(Addr->getType(), RefName);
10958         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10959         GVAddrRef->setConstant(/*Val=*/true);
10960         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10961         GVAddrRef->setInitializer(Addr);
10962         CGM.addCompilerUsedGlobal(GVAddrRef);
10963       }
10964     }
10965   } else {
10966     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10967             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10968              HasRequiresUnifiedSharedMemory)) &&
10969            "Declare target attribute must link or to with unified memory.");
10970     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10971       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10972     else
10973       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10974 
10975     if (CGM.getLangOpts().OpenMPIsDevice) {
10976       VarName = Addr->getName();
10977       Addr = nullptr;
10978     } else {
10979       VarName = getAddrOfDeclareTargetVar(VD).getName();
10980       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10981     }
10982     VarSize = CGM.getPointerSize();
10983     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10984   }
10985 
10986   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10987       VarName, Addr, VarSize, Flags, Linkage);
10988 }
10989 
10990 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10991   if (isa<FunctionDecl>(GD.getDecl()) ||
10992       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10993     return emitTargetFunctions(GD);
10994 
10995   return emitTargetGlobalVariable(GD);
10996 }
10997 
10998 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10999   for (const VarDecl *VD : DeferredGlobalVariables) {
11000     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11001         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11002     if (!Res)
11003       continue;
11004     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
11005         !HasRequiresUnifiedSharedMemory) {
11006       CGM.EmitGlobal(VD);
11007     } else {
11008       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11009               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
11010                HasRequiresUnifiedSharedMemory)) &&
11011              "Expected link clause or to clause with unified memory.");
11012       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11013     }
11014   }
11015 }
11016 
11017 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
11018     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11019   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11020          " Expected target-based directive.");
11021 }
11022 
11023 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
11024   for (const OMPClause *Clause : D->clauselists()) {
11025     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11026       HasRequiresUnifiedSharedMemory = true;
11027     } else if (const auto *AC =
11028                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11029       switch (AC->getAtomicDefaultMemOrderKind()) {
11030       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11031         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11032         break;
11033       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11034         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11035         break;
11036       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11037         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11038         break;
11039       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11040         break;
11041       }
11042     }
11043   }
11044 }
11045 
11046 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11047   return RequiresAtomicOrdering;
11048 }
11049 
11050 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11051                                                        LangAS &AS) {
11052   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11053     return false;
11054   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11055   switch(A->getAllocatorType()) {
11056   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11057   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11058   // Not supported, fallback to the default mem space.
11059   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11060   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11061   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11062   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11063   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11064   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11065   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11066     AS = LangAS::Default;
11067     return true;
11068   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11069     llvm_unreachable("Expected predefined allocator for the variables with the "
11070                      "static storage.");
11071   }
11072   return false;
11073 }
11074 
11075 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11076   return HasRequiresUnifiedSharedMemory;
11077 }
11078 
11079 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11080     CodeGenModule &CGM)
11081     : CGM(CGM) {
11082   if (CGM.getLangOpts().OpenMPIsDevice) {
11083     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11084     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11085   }
11086 }
11087 
11088 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11089   if (CGM.getLangOpts().OpenMPIsDevice)
11090     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11091 }
11092 
11093 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11094   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11095     return true;
11096 
11097   const auto *D = cast<FunctionDecl>(GD.getDecl());
11098   // Do not to emit function if it is marked as declare target as it was already
11099   // emitted.
11100   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11101     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11102       if (auto *F = dyn_cast_or_null<llvm::Function>(
11103               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11104         return !F->isDeclaration();
11105       return false;
11106     }
11107     return true;
11108   }
11109 
11110   return !AlreadyEmittedTargetDecls.insert(D).second;
11111 }
11112 
11113 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11114   // If we don't have entries or if we are emitting code for the device, we
11115   // don't need to do anything.
11116   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11117       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11118       (OffloadEntriesInfoManager.empty() &&
11119        !HasEmittedDeclareTargetRegion &&
11120        !HasEmittedTargetRegion))
11121     return nullptr;
11122 
11123   // Create and register the function that handles the requires directives.
11124   ASTContext &C = CGM.getContext();
11125 
11126   llvm::Function *RequiresRegFn;
11127   {
11128     CodeGenFunction CGF(CGM);
11129     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11130     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11131     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11132     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11133     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11134     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11135     // TODO: check for other requires clauses.
11136     // The requires directive takes effect only when a target region is
11137     // present in the compilation unit. Otherwise it is ignored and not
11138     // passed to the runtime. This avoids the runtime from throwing an error
11139     // for mismatching requires clauses across compilation units that don't
11140     // contain at least 1 target region.
11141     assert((HasEmittedTargetRegion ||
11142             HasEmittedDeclareTargetRegion ||
11143             !OffloadEntriesInfoManager.empty()) &&
11144            "Target or declare target region expected.");
11145     if (HasRequiresUnifiedSharedMemory)
11146       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11147     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11148                             CGM.getModule(), OMPRTL___tgt_register_requires),
11149                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11150     CGF.FinishFunction();
11151   }
11152   return RequiresRegFn;
11153 }
11154 
11155 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11156                                     const OMPExecutableDirective &D,
11157                                     SourceLocation Loc,
11158                                     llvm::Function *OutlinedFn,
11159                                     ArrayRef<llvm::Value *> CapturedVars) {
11160   if (!CGF.HaveInsertPoint())
11161     return;
11162 
11163   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11164   CodeGenFunction::RunCleanupsScope Scope(CGF);
11165 
11166   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11167   llvm::Value *Args[] = {
11168       RTLoc,
11169       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11170       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11171   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11172   RealArgs.append(std::begin(Args), std::end(Args));
11173   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11174 
11175   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11176       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11177   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11178 }
11179 
11180 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11181                                          const Expr *NumTeams,
11182                                          const Expr *ThreadLimit,
11183                                          SourceLocation Loc) {
11184   if (!CGF.HaveInsertPoint())
11185     return;
11186 
11187   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11188 
11189   llvm::Value *NumTeamsVal =
11190       NumTeams
11191           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11192                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11193           : CGF.Builder.getInt32(0);
11194 
11195   llvm::Value *ThreadLimitVal =
11196       ThreadLimit
11197           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11198                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11199           : CGF.Builder.getInt32(0);
11200 
11201   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11202   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11203                                      ThreadLimitVal};
11204   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11205                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11206                       PushNumTeamsArgs);
11207 }
11208 
11209 void CGOpenMPRuntime::emitTargetDataCalls(
11210     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11211     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11212   if (!CGF.HaveInsertPoint())
11213     return;
11214 
11215   // Action used to replace the default codegen action and turn privatization
11216   // off.
11217   PrePostActionTy NoPrivAction;
11218 
11219   // Generate the code for the opening of the data environment. Capture all the
11220   // arguments of the runtime call by reference because they are used in the
11221   // closing of the region.
11222   auto &&BeginThenGen = [this, &D, Device, &Info,
11223                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11224     // Fill up the arrays with all the mapped variables.
11225     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11226 
11227     // Get map clause information.
11228     MappableExprsHandler MEHandler(D, CGF);
11229     MEHandler.generateAllInfo(CombinedInfo);
11230 
11231     // Fill up the arrays and create the arguments.
11232     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11233                          /*IsNonContiguous=*/true);
11234 
11235     llvm::Value *BasePointersArrayArg = nullptr;
11236     llvm::Value *PointersArrayArg = nullptr;
11237     llvm::Value *SizesArrayArg = nullptr;
11238     llvm::Value *MapTypesArrayArg = nullptr;
11239     llvm::Value *MapNamesArrayArg = nullptr;
11240     llvm::Value *MappersArrayArg = nullptr;
11241     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11242                                  SizesArrayArg, MapTypesArrayArg,
11243                                  MapNamesArrayArg, MappersArrayArg, Info);
11244 
11245     // Emit device ID if any.
11246     llvm::Value *DeviceID = nullptr;
11247     if (Device) {
11248       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11249                                            CGF.Int64Ty, /*isSigned=*/true);
11250     } else {
11251       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11252     }
11253 
11254     // Emit the number of elements in the offloading arrays.
11255     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11256     //
11257     // Source location for the ident struct
11258     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11259 
11260     llvm::Value *OffloadingArgs[] = {RTLoc,
11261                                      DeviceID,
11262                                      PointerNum,
11263                                      BasePointersArrayArg,
11264                                      PointersArrayArg,
11265                                      SizesArrayArg,
11266                                      MapTypesArrayArg,
11267                                      MapNamesArrayArg,
11268                                      MappersArrayArg};
11269     CGF.EmitRuntimeCall(
11270         OMPBuilder.getOrCreateRuntimeFunction(
11271             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11272         OffloadingArgs);
11273 
11274     // If device pointer privatization is required, emit the body of the region
11275     // here. It will have to be duplicated: with and without privatization.
11276     if (!Info.CaptureDeviceAddrMap.empty())
11277       CodeGen(CGF);
11278   };
11279 
11280   // Generate code for the closing of the data region.
11281   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11282                                                 PrePostActionTy &) {
11283     assert(Info.isValid() && "Invalid data environment closing arguments.");
11284 
11285     llvm::Value *BasePointersArrayArg = nullptr;
11286     llvm::Value *PointersArrayArg = nullptr;
11287     llvm::Value *SizesArrayArg = nullptr;
11288     llvm::Value *MapTypesArrayArg = nullptr;
11289     llvm::Value *MapNamesArrayArg = nullptr;
11290     llvm::Value *MappersArrayArg = nullptr;
11291     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11292                                  SizesArrayArg, MapTypesArrayArg,
11293                                  MapNamesArrayArg, MappersArrayArg, Info,
11294                                  {/*ForEndCall=*/true});
11295 
11296     // Emit device ID if any.
11297     llvm::Value *DeviceID = nullptr;
11298     if (Device) {
11299       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11300                                            CGF.Int64Ty, /*isSigned=*/true);
11301     } else {
11302       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11303     }
11304 
11305     // Emit the number of elements in the offloading arrays.
11306     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11307 
11308     // Source location for the ident struct
11309     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11310 
11311     llvm::Value *OffloadingArgs[] = {RTLoc,
11312                                      DeviceID,
11313                                      PointerNum,
11314                                      BasePointersArrayArg,
11315                                      PointersArrayArg,
11316                                      SizesArrayArg,
11317                                      MapTypesArrayArg,
11318                                      MapNamesArrayArg,
11319                                      MappersArrayArg};
11320     CGF.EmitRuntimeCall(
11321         OMPBuilder.getOrCreateRuntimeFunction(
11322             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11323         OffloadingArgs);
11324   };
11325 
11326   // If we need device pointer privatization, we need to emit the body of the
11327   // region with no privatization in the 'else' branch of the conditional.
11328   // Otherwise, we don't have to do anything.
11329   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11330                                                          PrePostActionTy &) {
11331     if (!Info.CaptureDeviceAddrMap.empty()) {
11332       CodeGen.setAction(NoPrivAction);
11333       CodeGen(CGF);
11334     }
11335   };
11336 
11337   // We don't have to do anything to close the region if the if clause evaluates
11338   // to false.
11339   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11340 
11341   if (IfCond) {
11342     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11343   } else {
11344     RegionCodeGenTy RCG(BeginThenGen);
11345     RCG(CGF);
11346   }
11347 
11348   // If we don't require privatization of device pointers, we emit the body in
11349   // between the runtime calls. This avoids duplicating the body code.
11350   if (Info.CaptureDeviceAddrMap.empty()) {
11351     CodeGen.setAction(NoPrivAction);
11352     CodeGen(CGF);
11353   }
11354 
11355   if (IfCond) {
11356     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11357   } else {
11358     RegionCodeGenTy RCG(EndThenGen);
11359     RCG(CGF);
11360   }
11361 }
11362 
11363 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11364     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11365     const Expr *Device) {
11366   if (!CGF.HaveInsertPoint())
11367     return;
11368 
11369   assert((isa<OMPTargetEnterDataDirective>(D) ||
11370           isa<OMPTargetExitDataDirective>(D) ||
11371           isa<OMPTargetUpdateDirective>(D)) &&
11372          "Expecting either target enter, exit data, or update directives.");
11373 
11374   CodeGenFunction::OMPTargetDataInfo InputInfo;
11375   llvm::Value *MapTypesArray = nullptr;
11376   llvm::Value *MapNamesArray = nullptr;
11377   // Generate the code for the opening of the data environment.
11378   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11379                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11380     // Emit device ID if any.
11381     llvm::Value *DeviceID = nullptr;
11382     if (Device) {
11383       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11384                                            CGF.Int64Ty, /*isSigned=*/true);
11385     } else {
11386       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11387     }
11388 
11389     // Emit the number of elements in the offloading arrays.
11390     llvm::Constant *PointerNum =
11391         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11392 
11393     // Source location for the ident struct
11394     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11395 
11396     llvm::Value *OffloadingArgs[] = {RTLoc,
11397                                      DeviceID,
11398                                      PointerNum,
11399                                      InputInfo.BasePointersArray.getPointer(),
11400                                      InputInfo.PointersArray.getPointer(),
11401                                      InputInfo.SizesArray.getPointer(),
11402                                      MapTypesArray,
11403                                      MapNamesArray,
11404                                      InputInfo.MappersArray.getPointer()};
11405 
11406     // Select the right runtime function call for each standalone
11407     // directive.
11408     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11409     RuntimeFunction RTLFn;
11410     switch (D.getDirectiveKind()) {
11411     case OMPD_target_enter_data:
11412       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11413                         : OMPRTL___tgt_target_data_begin_mapper;
11414       break;
11415     case OMPD_target_exit_data:
11416       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11417                         : OMPRTL___tgt_target_data_end_mapper;
11418       break;
11419     case OMPD_target_update:
11420       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11421                         : OMPRTL___tgt_target_data_update_mapper;
11422       break;
11423     case OMPD_parallel:
11424     case OMPD_for:
11425     case OMPD_parallel_for:
11426     case OMPD_parallel_master:
11427     case OMPD_parallel_sections:
11428     case OMPD_for_simd:
11429     case OMPD_parallel_for_simd:
11430     case OMPD_cancel:
11431     case OMPD_cancellation_point:
11432     case OMPD_ordered:
11433     case OMPD_threadprivate:
11434     case OMPD_allocate:
11435     case OMPD_task:
11436     case OMPD_simd:
11437     case OMPD_tile:
11438     case OMPD_unroll:
11439     case OMPD_sections:
11440     case OMPD_section:
11441     case OMPD_single:
11442     case OMPD_master:
11443     case OMPD_critical:
11444     case OMPD_taskyield:
11445     case OMPD_barrier:
11446     case OMPD_taskwait:
11447     case OMPD_taskgroup:
11448     case OMPD_atomic:
11449     case OMPD_flush:
11450     case OMPD_depobj:
11451     case OMPD_scan:
11452     case OMPD_teams:
11453     case OMPD_target_data:
11454     case OMPD_distribute:
11455     case OMPD_distribute_simd:
11456     case OMPD_distribute_parallel_for:
11457     case OMPD_distribute_parallel_for_simd:
11458     case OMPD_teams_distribute:
11459     case OMPD_teams_distribute_simd:
11460     case OMPD_teams_distribute_parallel_for:
11461     case OMPD_teams_distribute_parallel_for_simd:
11462     case OMPD_declare_simd:
11463     case OMPD_declare_variant:
11464     case OMPD_begin_declare_variant:
11465     case OMPD_end_declare_variant:
11466     case OMPD_declare_target:
11467     case OMPD_end_declare_target:
11468     case OMPD_declare_reduction:
11469     case OMPD_declare_mapper:
11470     case OMPD_taskloop:
11471     case OMPD_taskloop_simd:
11472     case OMPD_master_taskloop:
11473     case OMPD_master_taskloop_simd:
11474     case OMPD_parallel_master_taskloop:
11475     case OMPD_parallel_master_taskloop_simd:
11476     case OMPD_target:
11477     case OMPD_target_simd:
11478     case OMPD_target_teams_distribute:
11479     case OMPD_target_teams_distribute_simd:
11480     case OMPD_target_teams_distribute_parallel_for:
11481     case OMPD_target_teams_distribute_parallel_for_simd:
11482     case OMPD_target_teams:
11483     case OMPD_target_parallel:
11484     case OMPD_target_parallel_for:
11485     case OMPD_target_parallel_for_simd:
11486     case OMPD_requires:
11487     case OMPD_metadirective:
11488     case OMPD_unknown:
11489     default:
11490       llvm_unreachable("Unexpected standalone target data directive.");
11491       break;
11492     }
11493     CGF.EmitRuntimeCall(
11494         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11495         OffloadingArgs);
11496   };
11497 
11498   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11499                           &MapNamesArray](CodeGenFunction &CGF,
11500                                           PrePostActionTy &) {
11501     // Fill up the arrays with all the mapped variables.
11502     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11503 
11504     // Get map clause information.
11505     MappableExprsHandler MEHandler(D, CGF);
11506     MEHandler.generateAllInfo(CombinedInfo);
11507 
11508     TargetDataInfo Info;
11509     // Fill up the arrays and create the arguments.
11510     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11511                          /*IsNonContiguous=*/true);
11512     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11513                              D.hasClausesOfKind<OMPNowaitClause>();
11514     emitOffloadingArraysArgument(
11515         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11516         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11517         {/*ForEndCall=*/false});
11518     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11519     InputInfo.BasePointersArray =
11520         Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign());
11521     InputInfo.PointersArray =
11522         Address::deprecated(Info.PointersArray, CGM.getPointerAlign());
11523     InputInfo.SizesArray =
11524         Address::deprecated(Info.SizesArray, CGM.getPointerAlign());
11525     InputInfo.MappersArray =
11526         Address::deprecated(Info.MappersArray, CGM.getPointerAlign());
11527     MapTypesArray = Info.MapTypesArray;
11528     MapNamesArray = Info.MapNamesArray;
11529     if (RequiresOuterTask)
11530       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11531     else
11532       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11533   };
11534 
11535   if (IfCond) {
11536     emitIfClause(CGF, IfCond, TargetThenGen,
11537                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11538   } else {
11539     RegionCodeGenTy ThenRCG(TargetThenGen);
11540     ThenRCG(CGF);
11541   }
11542 }
11543 
11544 namespace {
11545   /// Kind of parameter in a function with 'declare simd' directive.
11546   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11547   /// Attribute set of the parameter.
11548   struct ParamAttrTy {
11549     ParamKindTy Kind = Vector;
11550     llvm::APSInt StrideOrArg;
11551     llvm::APSInt Alignment;
11552   };
11553 } // namespace
11554 
11555 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11556                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11557   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11558   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11559   // of that clause. The VLEN value must be power of 2.
11560   // In other case the notion of the function`s "characteristic data type" (CDT)
11561   // is used to compute the vector length.
11562   // CDT is defined in the following order:
11563   //   a) For non-void function, the CDT is the return type.
11564   //   b) If the function has any non-uniform, non-linear parameters, then the
11565   //   CDT is the type of the first such parameter.
11566   //   c) If the CDT determined by a) or b) above is struct, union, or class
11567   //   type which is pass-by-value (except for the type that maps to the
11568   //   built-in complex data type), the characteristic data type is int.
11569   //   d) If none of the above three cases is applicable, the CDT is int.
11570   // The VLEN is then determined based on the CDT and the size of vector
11571   // register of that ISA for which current vector version is generated. The
11572   // VLEN is computed using the formula below:
11573   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11574   // where vector register size specified in section 3.2.1 Registers and the
11575   // Stack Frame of original AMD64 ABI document.
11576   QualType RetType = FD->getReturnType();
11577   if (RetType.isNull())
11578     return 0;
11579   ASTContext &C = FD->getASTContext();
11580   QualType CDT;
11581   if (!RetType.isNull() && !RetType->isVoidType()) {
11582     CDT = RetType;
11583   } else {
11584     unsigned Offset = 0;
11585     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11586       if (ParamAttrs[Offset].Kind == Vector)
11587         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11588       ++Offset;
11589     }
11590     if (CDT.isNull()) {
11591       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11592         if (ParamAttrs[I + Offset].Kind == Vector) {
11593           CDT = FD->getParamDecl(I)->getType();
11594           break;
11595         }
11596       }
11597     }
11598   }
11599   if (CDT.isNull())
11600     CDT = C.IntTy;
11601   CDT = CDT->getCanonicalTypeUnqualified();
11602   if (CDT->isRecordType() || CDT->isUnionType())
11603     CDT = C.IntTy;
11604   return C.getTypeSize(CDT);
11605 }
11606 
11607 static void
11608 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11609                            const llvm::APSInt &VLENVal,
11610                            ArrayRef<ParamAttrTy> ParamAttrs,
11611                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11612   struct ISADataTy {
11613     char ISA;
11614     unsigned VecRegSize;
11615   };
11616   ISADataTy ISAData[] = {
11617       {
11618           'b', 128
11619       }, // SSE
11620       {
11621           'c', 256
11622       }, // AVX
11623       {
11624           'd', 256
11625       }, // AVX2
11626       {
11627           'e', 512
11628       }, // AVX512
11629   };
11630   llvm::SmallVector<char, 2> Masked;
11631   switch (State) {
11632   case OMPDeclareSimdDeclAttr::BS_Undefined:
11633     Masked.push_back('N');
11634     Masked.push_back('M');
11635     break;
11636   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11637     Masked.push_back('N');
11638     break;
11639   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11640     Masked.push_back('M');
11641     break;
11642   }
11643   for (char Mask : Masked) {
11644     for (const ISADataTy &Data : ISAData) {
11645       SmallString<256> Buffer;
11646       llvm::raw_svector_ostream Out(Buffer);
11647       Out << "_ZGV" << Data.ISA << Mask;
11648       if (!VLENVal) {
11649         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11650         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11651         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11652       } else {
11653         Out << VLENVal;
11654       }
11655       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11656         switch (ParamAttr.Kind){
11657         case LinearWithVarStride:
11658           Out << 's' << ParamAttr.StrideOrArg;
11659           break;
11660         case Linear:
11661           Out << 'l';
11662           if (ParamAttr.StrideOrArg != 1)
11663             Out << ParamAttr.StrideOrArg;
11664           break;
11665         case Uniform:
11666           Out << 'u';
11667           break;
11668         case Vector:
11669           Out << 'v';
11670           break;
11671         }
11672         if (!!ParamAttr.Alignment)
11673           Out << 'a' << ParamAttr.Alignment;
11674       }
11675       Out << '_' << Fn->getName();
11676       Fn->addFnAttr(Out.str());
11677     }
11678   }
11679 }
11680 
11681 // This are the Functions that are needed to mangle the name of the
11682 // vector functions generated by the compiler, according to the rules
11683 // defined in the "Vector Function ABI specifications for AArch64",
11684 // available at
11685 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11686 
11687 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11688 ///
11689 /// TODO: Need to implement the behavior for reference marked with a
11690 /// var or no linear modifiers (1.b in the section). For this, we
11691 /// need to extend ParamKindTy to support the linear modifiers.
11692 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11693   QT = QT.getCanonicalType();
11694 
11695   if (QT->isVoidType())
11696     return false;
11697 
11698   if (Kind == ParamKindTy::Uniform)
11699     return false;
11700 
11701   if (Kind == ParamKindTy::Linear)
11702     return false;
11703 
11704   // TODO: Handle linear references with modifiers
11705 
11706   if (Kind == ParamKindTy::LinearWithVarStride)
11707     return false;
11708 
11709   return true;
11710 }
11711 
11712 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11713 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11714   QT = QT.getCanonicalType();
11715   unsigned Size = C.getTypeSize(QT);
11716 
11717   // Only scalars and complex within 16 bytes wide set PVB to true.
11718   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11719     return false;
11720 
11721   if (QT->isFloatingType())
11722     return true;
11723 
11724   if (QT->isIntegerType())
11725     return true;
11726 
11727   if (QT->isPointerType())
11728     return true;
11729 
11730   // TODO: Add support for complex types (section 3.1.2, item 2).
11731 
11732   return false;
11733 }
11734 
11735 /// Computes the lane size (LS) of a return type or of an input parameter,
11736 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11737 /// TODO: Add support for references, section 3.2.1, item 1.
11738 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11739   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11740     QualType PTy = QT.getCanonicalType()->getPointeeType();
11741     if (getAArch64PBV(PTy, C))
11742       return C.getTypeSize(PTy);
11743   }
11744   if (getAArch64PBV(QT, C))
11745     return C.getTypeSize(QT);
11746 
11747   return C.getTypeSize(C.getUIntPtrType());
11748 }
11749 
11750 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11751 // signature of the scalar function, as defined in 3.2.2 of the
11752 // AAVFABI.
11753 static std::tuple<unsigned, unsigned, bool>
11754 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11755   QualType RetType = FD->getReturnType().getCanonicalType();
11756 
11757   ASTContext &C = FD->getASTContext();
11758 
11759   bool OutputBecomesInput = false;
11760 
11761   llvm::SmallVector<unsigned, 8> Sizes;
11762   if (!RetType->isVoidType()) {
11763     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11764     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11765       OutputBecomesInput = true;
11766   }
11767   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11768     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11769     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11770   }
11771 
11772   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11773   // The LS of a function parameter / return value can only be a power
11774   // of 2, starting from 8 bits, up to 128.
11775   assert(llvm::all_of(Sizes,
11776                       [](unsigned Size) {
11777                         return Size == 8 || Size == 16 || Size == 32 ||
11778                                Size == 64 || Size == 128;
11779                       }) &&
11780          "Invalid size");
11781 
11782   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11783                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11784                          OutputBecomesInput);
11785 }
11786 
11787 /// Mangle the parameter part of the vector function name according to
11788 /// their OpenMP classification. The mangling function is defined in
11789 /// section 3.5 of the AAVFABI.
11790 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11791   SmallString<256> Buffer;
11792   llvm::raw_svector_ostream Out(Buffer);
11793   for (const auto &ParamAttr : ParamAttrs) {
11794     switch (ParamAttr.Kind) {
11795     case LinearWithVarStride:
11796       Out << "ls" << ParamAttr.StrideOrArg;
11797       break;
11798     case Linear:
11799       Out << 'l';
11800       // Don't print the step value if it is not present or if it is
11801       // equal to 1.
11802       if (ParamAttr.StrideOrArg != 1)
11803         Out << ParamAttr.StrideOrArg;
11804       break;
11805     case Uniform:
11806       Out << 'u';
11807       break;
11808     case Vector:
11809       Out << 'v';
11810       break;
11811     }
11812 
11813     if (!!ParamAttr.Alignment)
11814       Out << 'a' << ParamAttr.Alignment;
11815   }
11816 
11817   return std::string(Out.str());
11818 }
11819 
11820 // Function used to add the attribute. The parameter `VLEN` is
11821 // templated to allow the use of "x" when targeting scalable functions
11822 // for SVE.
11823 template <typename T>
11824 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11825                                  char ISA, StringRef ParSeq,
11826                                  StringRef MangledName, bool OutputBecomesInput,
11827                                  llvm::Function *Fn) {
11828   SmallString<256> Buffer;
11829   llvm::raw_svector_ostream Out(Buffer);
11830   Out << Prefix << ISA << LMask << VLEN;
11831   if (OutputBecomesInput)
11832     Out << "v";
11833   Out << ParSeq << "_" << MangledName;
11834   Fn->addFnAttr(Out.str());
11835 }
11836 
11837 // Helper function to generate the Advanced SIMD names depending on
11838 // the value of the NDS when simdlen is not present.
11839 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11840                                       StringRef Prefix, char ISA,
11841                                       StringRef ParSeq, StringRef MangledName,
11842                                       bool OutputBecomesInput,
11843                                       llvm::Function *Fn) {
11844   switch (NDS) {
11845   case 8:
11846     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11847                          OutputBecomesInput, Fn);
11848     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11849                          OutputBecomesInput, Fn);
11850     break;
11851   case 16:
11852     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11853                          OutputBecomesInput, Fn);
11854     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11855                          OutputBecomesInput, Fn);
11856     break;
11857   case 32:
11858     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11859                          OutputBecomesInput, Fn);
11860     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11861                          OutputBecomesInput, Fn);
11862     break;
11863   case 64:
11864   case 128:
11865     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11866                          OutputBecomesInput, Fn);
11867     break;
11868   default:
11869     llvm_unreachable("Scalar type is too wide.");
11870   }
11871 }
11872 
11873 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11874 static void emitAArch64DeclareSimdFunction(
11875     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11876     ArrayRef<ParamAttrTy> ParamAttrs,
11877     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11878     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11879 
11880   // Get basic data for building the vector signature.
11881   const auto Data = getNDSWDS(FD, ParamAttrs);
11882   const unsigned NDS = std::get<0>(Data);
11883   const unsigned WDS = std::get<1>(Data);
11884   const bool OutputBecomesInput = std::get<2>(Data);
11885 
11886   // Check the values provided via `simdlen` by the user.
11887   // 1. A `simdlen(1)` doesn't produce vector signatures,
11888   if (UserVLEN == 1) {
11889     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11890         DiagnosticsEngine::Warning,
11891         "The clause simdlen(1) has no effect when targeting aarch64.");
11892     CGM.getDiags().Report(SLoc, DiagID);
11893     return;
11894   }
11895 
11896   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11897   // Advanced SIMD output.
11898   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11899     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11900         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11901                                     "power of 2 when targeting Advanced SIMD.");
11902     CGM.getDiags().Report(SLoc, DiagID);
11903     return;
11904   }
11905 
11906   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11907   // limits.
11908   if (ISA == 's' && UserVLEN != 0) {
11909     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11910       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11911           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11912                                       "lanes in the architectural constraints "
11913                                       "for SVE (min is 128-bit, max is "
11914                                       "2048-bit, by steps of 128-bit)");
11915       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11916       return;
11917     }
11918   }
11919 
11920   // Sort out parameter sequence.
11921   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11922   StringRef Prefix = "_ZGV";
11923   // Generate simdlen from user input (if any).
11924   if (UserVLEN) {
11925     if (ISA == 's') {
11926       // SVE generates only a masked function.
11927       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11928                            OutputBecomesInput, Fn);
11929     } else {
11930       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11931       // Advanced SIMD generates one or two functions, depending on
11932       // the `[not]inbranch` clause.
11933       switch (State) {
11934       case OMPDeclareSimdDeclAttr::BS_Undefined:
11935         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11936                              OutputBecomesInput, Fn);
11937         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11938                              OutputBecomesInput, Fn);
11939         break;
11940       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11941         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11942                              OutputBecomesInput, Fn);
11943         break;
11944       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11945         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11946                              OutputBecomesInput, Fn);
11947         break;
11948       }
11949     }
11950   } else {
11951     // If no user simdlen is provided, follow the AAVFABI rules for
11952     // generating the vector length.
11953     if (ISA == 's') {
11954       // SVE, section 3.4.1, item 1.
11955       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11956                            OutputBecomesInput, Fn);
11957     } else {
11958       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11959       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11960       // two vector names depending on the use of the clause
11961       // `[not]inbranch`.
11962       switch (State) {
11963       case OMPDeclareSimdDeclAttr::BS_Undefined:
11964         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11965                                   OutputBecomesInput, Fn);
11966         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11967                                   OutputBecomesInput, Fn);
11968         break;
11969       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11970         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11971                                   OutputBecomesInput, Fn);
11972         break;
11973       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11974         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11975                                   OutputBecomesInput, Fn);
11976         break;
11977       }
11978     }
11979   }
11980 }
11981 
11982 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11983                                               llvm::Function *Fn) {
11984   ASTContext &C = CGM.getContext();
11985   FD = FD->getMostRecentDecl();
11986   // Map params to their positions in function decl.
11987   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11988   if (isa<CXXMethodDecl>(FD))
11989     ParamPositions.try_emplace(FD, 0);
11990   unsigned ParamPos = ParamPositions.size();
11991   for (const ParmVarDecl *P : FD->parameters()) {
11992     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11993     ++ParamPos;
11994   }
11995   while (FD) {
11996     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11997       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11998       // Mark uniform parameters.
11999       for (const Expr *E : Attr->uniforms()) {
12000         E = E->IgnoreParenImpCasts();
12001         unsigned Pos;
12002         if (isa<CXXThisExpr>(E)) {
12003           Pos = ParamPositions[FD];
12004         } else {
12005           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12006                                 ->getCanonicalDecl();
12007           Pos = ParamPositions[PVD];
12008         }
12009         ParamAttrs[Pos].Kind = Uniform;
12010       }
12011       // Get alignment info.
12012       auto *NI = Attr->alignments_begin();
12013       for (const Expr *E : Attr->aligneds()) {
12014         E = E->IgnoreParenImpCasts();
12015         unsigned Pos;
12016         QualType ParmTy;
12017         if (isa<CXXThisExpr>(E)) {
12018           Pos = ParamPositions[FD];
12019           ParmTy = E->getType();
12020         } else {
12021           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12022                                 ->getCanonicalDecl();
12023           Pos = ParamPositions[PVD];
12024           ParmTy = PVD->getType();
12025         }
12026         ParamAttrs[Pos].Alignment =
12027             (*NI)
12028                 ? (*NI)->EvaluateKnownConstInt(C)
12029                 : llvm::APSInt::getUnsigned(
12030                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12031                           .getQuantity());
12032         ++NI;
12033       }
12034       // Mark linear parameters.
12035       auto *SI = Attr->steps_begin();
12036       auto *MI = Attr->modifiers_begin();
12037       for (const Expr *E : Attr->linears()) {
12038         E = E->IgnoreParenImpCasts();
12039         unsigned Pos;
12040         // Rescaling factor needed to compute the linear parameter
12041         // value in the mangled name.
12042         unsigned PtrRescalingFactor = 1;
12043         if (isa<CXXThisExpr>(E)) {
12044           Pos = ParamPositions[FD];
12045         } else {
12046           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12047                                 ->getCanonicalDecl();
12048           Pos = ParamPositions[PVD];
12049           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12050             PtrRescalingFactor = CGM.getContext()
12051                                      .getTypeSizeInChars(P->getPointeeType())
12052                                      .getQuantity();
12053         }
12054         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12055         ParamAttr.Kind = Linear;
12056         // Assuming a stride of 1, for `linear` without modifiers.
12057         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12058         if (*SI) {
12059           Expr::EvalResult Result;
12060           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12061             if (const auto *DRE =
12062                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12063               if (const auto *StridePVD =
12064                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12065                 ParamAttr.Kind = LinearWithVarStride;
12066                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12067                     ParamPositions[StridePVD->getCanonicalDecl()]);
12068               }
12069             }
12070           } else {
12071             ParamAttr.StrideOrArg = Result.Val.getInt();
12072           }
12073         }
12074         // If we are using a linear clause on a pointer, we need to
12075         // rescale the value of linear_step with the byte size of the
12076         // pointee type.
12077         if (Linear == ParamAttr.Kind)
12078           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12079         ++SI;
12080         ++MI;
12081       }
12082       llvm::APSInt VLENVal;
12083       SourceLocation ExprLoc;
12084       const Expr *VLENExpr = Attr->getSimdlen();
12085       if (VLENExpr) {
12086         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12087         ExprLoc = VLENExpr->getExprLoc();
12088       }
12089       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12090       if (CGM.getTriple().isX86()) {
12091         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12092       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12093         unsigned VLEN = VLENVal.getExtValue();
12094         StringRef MangledName = Fn->getName();
12095         if (CGM.getTarget().hasFeature("sve"))
12096           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12097                                          MangledName, 's', 128, Fn, ExprLoc);
12098         if (CGM.getTarget().hasFeature("neon"))
12099           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12100                                          MangledName, 'n', 128, Fn, ExprLoc);
12101       }
12102     }
12103     FD = FD->getPreviousDecl();
12104   }
12105 }
12106 
12107 namespace {
12108 /// Cleanup action for doacross support.
12109 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12110 public:
12111   static const int DoacrossFinArgs = 2;
12112 
12113 private:
12114   llvm::FunctionCallee RTLFn;
12115   llvm::Value *Args[DoacrossFinArgs];
12116 
12117 public:
12118   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12119                     ArrayRef<llvm::Value *> CallArgs)
12120       : RTLFn(RTLFn) {
12121     assert(CallArgs.size() == DoacrossFinArgs);
12122     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12123   }
12124   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12125     if (!CGF.HaveInsertPoint())
12126       return;
12127     CGF.EmitRuntimeCall(RTLFn, Args);
12128   }
12129 };
12130 } // namespace
12131 
12132 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12133                                        const OMPLoopDirective &D,
12134                                        ArrayRef<Expr *> NumIterations) {
12135   if (!CGF.HaveInsertPoint())
12136     return;
12137 
12138   ASTContext &C = CGM.getContext();
12139   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12140   RecordDecl *RD;
12141   if (KmpDimTy.isNull()) {
12142     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12143     //  kmp_int64 lo; // lower
12144     //  kmp_int64 up; // upper
12145     //  kmp_int64 st; // stride
12146     // };
12147     RD = C.buildImplicitRecord("kmp_dim");
12148     RD->startDefinition();
12149     addFieldToRecordDecl(C, RD, Int64Ty);
12150     addFieldToRecordDecl(C, RD, Int64Ty);
12151     addFieldToRecordDecl(C, RD, Int64Ty);
12152     RD->completeDefinition();
12153     KmpDimTy = C.getRecordType(RD);
12154   } else {
12155     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12156   }
12157   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12158   QualType ArrayTy =
12159       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12160 
12161   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12162   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12163   enum { LowerFD = 0, UpperFD, StrideFD };
12164   // Fill dims with data.
12165   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12166     LValue DimsLVal = CGF.MakeAddrLValue(
12167         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12168     // dims.upper = num_iterations;
12169     LValue UpperLVal = CGF.EmitLValueForField(
12170         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12171     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12172         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12173         Int64Ty, NumIterations[I]->getExprLoc());
12174     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12175     // dims.stride = 1;
12176     LValue StrideLVal = CGF.EmitLValueForField(
12177         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12178     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12179                           StrideLVal);
12180   }
12181 
12182   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12183   // kmp_int32 num_dims, struct kmp_dim * dims);
12184   llvm::Value *Args[] = {
12185       emitUpdateLocation(CGF, D.getBeginLoc()),
12186       getThreadID(CGF, D.getBeginLoc()),
12187       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12188       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12189           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12190           CGM.VoidPtrTy)};
12191 
12192   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12193       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12194   CGF.EmitRuntimeCall(RTLFn, Args);
12195   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12196       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12197   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12198       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12199   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12200                                              llvm::makeArrayRef(FiniArgs));
12201 }
12202 
12203 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12204                                           const OMPDependClause *C) {
12205   QualType Int64Ty =
12206       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12207   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12208   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12209       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12210   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12211   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12212     const Expr *CounterVal = C->getLoopData(I);
12213     assert(CounterVal);
12214     llvm::Value *CntVal = CGF.EmitScalarConversion(
12215         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12216         CounterVal->getExprLoc());
12217     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12218                           /*Volatile=*/false, Int64Ty);
12219   }
12220   llvm::Value *Args[] = {
12221       emitUpdateLocation(CGF, C->getBeginLoc()),
12222       getThreadID(CGF, C->getBeginLoc()),
12223       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12224   llvm::FunctionCallee RTLFn;
12225   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12226     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12227                                                   OMPRTL___kmpc_doacross_post);
12228   } else {
12229     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12230     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12231                                                   OMPRTL___kmpc_doacross_wait);
12232   }
12233   CGF.EmitRuntimeCall(RTLFn, Args);
12234 }
12235 
12236 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12237                                llvm::FunctionCallee Callee,
12238                                ArrayRef<llvm::Value *> Args) const {
12239   assert(Loc.isValid() && "Outlined function call location must be valid.");
12240   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12241 
12242   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12243     if (Fn->doesNotThrow()) {
12244       CGF.EmitNounwindRuntimeCall(Fn, Args);
12245       return;
12246     }
12247   }
12248   CGF.EmitRuntimeCall(Callee, Args);
12249 }
12250 
12251 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12252     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12253     ArrayRef<llvm::Value *> Args) const {
12254   emitCall(CGF, Loc, OutlinedFn, Args);
12255 }
12256 
12257 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12258   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12259     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12260       HasEmittedDeclareTargetRegion = true;
12261 }
12262 
12263 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12264                                              const VarDecl *NativeParam,
12265                                              const VarDecl *TargetParam) const {
12266   return CGF.GetAddrOfLocalVar(NativeParam);
12267 }
12268 
12269 /// Return allocator value from expression, or return a null allocator (default
12270 /// when no allocator specified).
12271 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12272                                     const Expr *Allocator) {
12273   llvm::Value *AllocVal;
12274   if (Allocator) {
12275     AllocVal = CGF.EmitScalarExpr(Allocator);
12276     // According to the standard, the original allocator type is a enum
12277     // (integer). Convert to pointer type, if required.
12278     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12279                                         CGF.getContext().VoidPtrTy,
12280                                         Allocator->getExprLoc());
12281   } else {
12282     // If no allocator specified, it defaults to the null allocator.
12283     AllocVal = llvm::Constant::getNullValue(
12284         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12285   }
12286   return AllocVal;
12287 }
12288 
12289 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12290                                                    const VarDecl *VD) {
12291   if (!VD)
12292     return Address::invalid();
12293   Address UntiedAddr = Address::invalid();
12294   Address UntiedRealAddr = Address::invalid();
12295   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12296   if (It != FunctionToUntiedTaskStackMap.end()) {
12297     const UntiedLocalVarsAddressesMap &UntiedData =
12298         UntiedLocalVarsStack[It->second];
12299     auto I = UntiedData.find(VD);
12300     if (I != UntiedData.end()) {
12301       UntiedAddr = I->second.first;
12302       UntiedRealAddr = I->second.second;
12303     }
12304   }
12305   const VarDecl *CVD = VD->getCanonicalDecl();
12306   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12307     // Use the default allocation.
12308     if (!isAllocatableDecl(VD))
12309       return UntiedAddr;
12310     llvm::Value *Size;
12311     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12312     if (CVD->getType()->isVariablyModifiedType()) {
12313       Size = CGF.getTypeSize(CVD->getType());
12314       // Align the size: ((size + align - 1) / align) * align
12315       Size = CGF.Builder.CreateNUWAdd(
12316           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12317       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12318       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12319     } else {
12320       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12321       Size = CGM.getSize(Sz.alignTo(Align));
12322     }
12323     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12324     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12325     const Expr *Allocator = AA->getAllocator();
12326     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12327     llvm::Value *Alignment =
12328         AA->getAlignment()
12329             ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
12330                                         CGM.SizeTy, /*isSigned=*/false)
12331             : nullptr;
12332     SmallVector<llvm::Value *, 4> Args;
12333     Args.push_back(ThreadID);
12334     if (Alignment)
12335       Args.push_back(Alignment);
12336     Args.push_back(Size);
12337     Args.push_back(AllocVal);
12338     llvm::omp::RuntimeFunction FnID =
12339         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12340     llvm::Value *Addr = CGF.EmitRuntimeCall(
12341         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12342         getName({CVD->getName(), ".void.addr"}));
12343     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12344         CGM.getModule(), OMPRTL___kmpc_free);
12345     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12346     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12347         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12348     if (UntiedAddr.isValid())
12349       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12350 
12351     // Cleanup action for allocate support.
12352     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12353       llvm::FunctionCallee RTLFn;
12354       SourceLocation::UIntTy LocEncoding;
12355       Address Addr;
12356       const Expr *AllocExpr;
12357 
12358     public:
12359       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12360                            SourceLocation::UIntTy LocEncoding, Address Addr,
12361                            const Expr *AllocExpr)
12362           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12363             AllocExpr(AllocExpr) {}
12364       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12365         if (!CGF.HaveInsertPoint())
12366           return;
12367         llvm::Value *Args[3];
12368         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12369             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12370         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12371             Addr.getPointer(), CGF.VoidPtrTy);
12372         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12373         Args[2] = AllocVal;
12374         CGF.EmitRuntimeCall(RTLFn, Args);
12375       }
12376     };
12377     Address VDAddr = UntiedRealAddr.isValid()
12378                          ? UntiedRealAddr
12379                          : Address::deprecated(Addr, Align);
12380     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12381         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12382         VDAddr, Allocator);
12383     if (UntiedRealAddr.isValid())
12384       if (auto *Region =
12385               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12386         Region->emitUntiedSwitch(CGF);
12387     return VDAddr;
12388   }
12389   return UntiedAddr;
12390 }
12391 
12392 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12393                                              const VarDecl *VD) const {
12394   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12395   if (It == FunctionToUntiedTaskStackMap.end())
12396     return false;
12397   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12398 }
12399 
12400 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12401     CodeGenModule &CGM, const OMPLoopDirective &S)
12402     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12403   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12404   if (!NeedToPush)
12405     return;
12406   NontemporalDeclsSet &DS =
12407       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12408   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12409     for (const Stmt *Ref : C->private_refs()) {
12410       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12411       const ValueDecl *VD;
12412       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12413         VD = DRE->getDecl();
12414       } else {
12415         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12416         assert((ME->isImplicitCXXThis() ||
12417                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12418                "Expected member of current class.");
12419         VD = ME->getMemberDecl();
12420       }
12421       DS.insert(VD);
12422     }
12423   }
12424 }
12425 
12426 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12427   if (!NeedToPush)
12428     return;
12429   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12430 }
12431 
12432 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12433     CodeGenFunction &CGF,
12434     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12435                           std::pair<Address, Address>> &LocalVars)
12436     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12437   if (!NeedToPush)
12438     return;
12439   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12440       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12441   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12442 }
12443 
12444 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12445   if (!NeedToPush)
12446     return;
12447   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12448 }
12449 
12450 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12451   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12452 
12453   return llvm::any_of(
12454       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12455       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12456 }
12457 
12458 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12459     const OMPExecutableDirective &S,
12460     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12461     const {
12462   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12463   // Vars in target/task regions must be excluded completely.
12464   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12465       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12466     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12467     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12468     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12469     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12470       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12471         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12472     }
12473   }
12474   // Exclude vars in private clauses.
12475   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12476     for (const Expr *Ref : C->varlists()) {
12477       if (!Ref->getType()->isScalarType())
12478         continue;
12479       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12480       if (!DRE)
12481         continue;
12482       NeedToCheckForLPCs.insert(DRE->getDecl());
12483     }
12484   }
12485   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12486     for (const Expr *Ref : C->varlists()) {
12487       if (!Ref->getType()->isScalarType())
12488         continue;
12489       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12490       if (!DRE)
12491         continue;
12492       NeedToCheckForLPCs.insert(DRE->getDecl());
12493     }
12494   }
12495   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12496     for (const Expr *Ref : C->varlists()) {
12497       if (!Ref->getType()->isScalarType())
12498         continue;
12499       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12500       if (!DRE)
12501         continue;
12502       NeedToCheckForLPCs.insert(DRE->getDecl());
12503     }
12504   }
12505   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12506     for (const Expr *Ref : C->varlists()) {
12507       if (!Ref->getType()->isScalarType())
12508         continue;
12509       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12510       if (!DRE)
12511         continue;
12512       NeedToCheckForLPCs.insert(DRE->getDecl());
12513     }
12514   }
12515   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12516     for (const Expr *Ref : C->varlists()) {
12517       if (!Ref->getType()->isScalarType())
12518         continue;
12519       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12520       if (!DRE)
12521         continue;
12522       NeedToCheckForLPCs.insert(DRE->getDecl());
12523     }
12524   }
12525   for (const Decl *VD : NeedToCheckForLPCs) {
12526     for (const LastprivateConditionalData &Data :
12527          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12528       if (Data.DeclToUniqueName.count(VD) > 0) {
12529         if (!Data.Disabled)
12530           NeedToAddForLPCsAsDisabled.insert(VD);
12531         break;
12532       }
12533     }
12534   }
12535 }
12536 
12537 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12538     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12539     : CGM(CGF.CGM),
12540       Action((CGM.getLangOpts().OpenMP >= 50 &&
12541               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12542                            [](const OMPLastprivateClause *C) {
12543                              return C->getKind() ==
12544                                     OMPC_LASTPRIVATE_conditional;
12545                            }))
12546                  ? ActionToDo::PushAsLastprivateConditional
12547                  : ActionToDo::DoNotPush) {
12548   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12549   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12550     return;
12551   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12552          "Expected a push action.");
12553   LastprivateConditionalData &Data =
12554       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12555   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12556     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12557       continue;
12558 
12559     for (const Expr *Ref : C->varlists()) {
12560       Data.DeclToUniqueName.insert(std::make_pair(
12561           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12562           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12563     }
12564   }
12565   Data.IVLVal = IVLVal;
12566   Data.Fn = CGF.CurFn;
12567 }
12568 
12569 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12570     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12571     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12572   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12573   if (CGM.getLangOpts().OpenMP < 50)
12574     return;
12575   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12576   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12577   if (!NeedToAddForLPCsAsDisabled.empty()) {
12578     Action = ActionToDo::DisableLastprivateConditional;
12579     LastprivateConditionalData &Data =
12580         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12581     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12582       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12583     Data.Fn = CGF.CurFn;
12584     Data.Disabled = true;
12585   }
12586 }
12587 
12588 CGOpenMPRuntime::LastprivateConditionalRAII
12589 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12590     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12591   return LastprivateConditionalRAII(CGF, S);
12592 }
12593 
12594 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12595   if (CGM.getLangOpts().OpenMP < 50)
12596     return;
12597   if (Action == ActionToDo::DisableLastprivateConditional) {
12598     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12599            "Expected list of disabled private vars.");
12600     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12601   }
12602   if (Action == ActionToDo::PushAsLastprivateConditional) {
12603     assert(
12604         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12605         "Expected list of lastprivate conditional vars.");
12606     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12607   }
12608 }
12609 
12610 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12611                                                         const VarDecl *VD) {
12612   ASTContext &C = CGM.getContext();
12613   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12614   if (I == LastprivateConditionalToTypes.end())
12615     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12616   QualType NewType;
12617   const FieldDecl *VDField;
12618   const FieldDecl *FiredField;
12619   LValue BaseLVal;
12620   auto VI = I->getSecond().find(VD);
12621   if (VI == I->getSecond().end()) {
12622     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12623     RD->startDefinition();
12624     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12625     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12626     RD->completeDefinition();
12627     NewType = C.getRecordType(RD);
12628     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12629     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12630     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12631   } else {
12632     NewType = std::get<0>(VI->getSecond());
12633     VDField = std::get<1>(VI->getSecond());
12634     FiredField = std::get<2>(VI->getSecond());
12635     BaseLVal = std::get<3>(VI->getSecond());
12636   }
12637   LValue FiredLVal =
12638       CGF.EmitLValueForField(BaseLVal, FiredField);
12639   CGF.EmitStoreOfScalar(
12640       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12641       FiredLVal);
12642   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12643 }
12644 
12645 namespace {
12646 /// Checks if the lastprivate conditional variable is referenced in LHS.
12647 class LastprivateConditionalRefChecker final
12648     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12649   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12650   const Expr *FoundE = nullptr;
12651   const Decl *FoundD = nullptr;
12652   StringRef UniqueDeclName;
12653   LValue IVLVal;
12654   llvm::Function *FoundFn = nullptr;
12655   SourceLocation Loc;
12656 
12657 public:
12658   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12659     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12660          llvm::reverse(LPM)) {
12661       auto It = D.DeclToUniqueName.find(E->getDecl());
12662       if (It == D.DeclToUniqueName.end())
12663         continue;
12664       if (D.Disabled)
12665         return false;
12666       FoundE = E;
12667       FoundD = E->getDecl()->getCanonicalDecl();
12668       UniqueDeclName = It->second;
12669       IVLVal = D.IVLVal;
12670       FoundFn = D.Fn;
12671       break;
12672     }
12673     return FoundE == E;
12674   }
12675   bool VisitMemberExpr(const MemberExpr *E) {
12676     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12677       return false;
12678     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12679          llvm::reverse(LPM)) {
12680       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12681       if (It == D.DeclToUniqueName.end())
12682         continue;
12683       if (D.Disabled)
12684         return false;
12685       FoundE = E;
12686       FoundD = E->getMemberDecl()->getCanonicalDecl();
12687       UniqueDeclName = It->second;
12688       IVLVal = D.IVLVal;
12689       FoundFn = D.Fn;
12690       break;
12691     }
12692     return FoundE == E;
12693   }
12694   bool VisitStmt(const Stmt *S) {
12695     for (const Stmt *Child : S->children()) {
12696       if (!Child)
12697         continue;
12698       if (const auto *E = dyn_cast<Expr>(Child))
12699         if (!E->isGLValue())
12700           continue;
12701       if (Visit(Child))
12702         return true;
12703     }
12704     return false;
12705   }
12706   explicit LastprivateConditionalRefChecker(
12707       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12708       : LPM(LPM) {}
12709   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12710   getFoundData() const {
12711     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12712   }
12713 };
12714 } // namespace
12715 
12716 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12717                                                        LValue IVLVal,
12718                                                        StringRef UniqueDeclName,
12719                                                        LValue LVal,
12720                                                        SourceLocation Loc) {
12721   // Last updated loop counter for the lastprivate conditional var.
12722   // int<xx> last_iv = 0;
12723   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12724   llvm::Constant *LastIV =
12725       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12726   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12727       IVLVal.getAlignment().getAsAlign());
12728   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12729 
12730   // Last value of the lastprivate conditional.
12731   // decltype(priv_a) last_a;
12732   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12733       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12734   Last->setAlignment(LVal.getAlignment().getAsAlign());
12735   LValue LastLVal = CGF.MakeAddrLValue(
12736       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12737 
12738   // Global loop counter. Required to handle inner parallel-for regions.
12739   // iv
12740   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12741 
12742   // #pragma omp critical(a)
12743   // if (last_iv <= iv) {
12744   //   last_iv = iv;
12745   //   last_a = priv_a;
12746   // }
12747   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12748                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12749     Action.Enter(CGF);
12750     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12751     // (last_iv <= iv) ? Check if the variable is updated and store new
12752     // value in global var.
12753     llvm::Value *CmpRes;
12754     if (IVLVal.getType()->isSignedIntegerType()) {
12755       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12756     } else {
12757       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12758              "Loop iteration variable must be integer.");
12759       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12760     }
12761     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12762     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12763     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12764     // {
12765     CGF.EmitBlock(ThenBB);
12766 
12767     //   last_iv = iv;
12768     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12769 
12770     //   last_a = priv_a;
12771     switch (CGF.getEvaluationKind(LVal.getType())) {
12772     case TEK_Scalar: {
12773       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12774       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12775       break;
12776     }
12777     case TEK_Complex: {
12778       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12779       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12780       break;
12781     }
12782     case TEK_Aggregate:
12783       llvm_unreachable(
12784           "Aggregates are not supported in lastprivate conditional.");
12785     }
12786     // }
12787     CGF.EmitBranch(ExitBB);
12788     // There is no need to emit line number for unconditional branch.
12789     (void)ApplyDebugLocation::CreateEmpty(CGF);
12790     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12791   };
12792 
12793   if (CGM.getLangOpts().OpenMPSimd) {
12794     // Do not emit as a critical region as no parallel region could be emitted.
12795     RegionCodeGenTy ThenRCG(CodeGen);
12796     ThenRCG(CGF);
12797   } else {
12798     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12799   }
12800 }
12801 
12802 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12803                                                          const Expr *LHS) {
12804   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12805     return;
12806   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12807   if (!Checker.Visit(LHS))
12808     return;
12809   const Expr *FoundE;
12810   const Decl *FoundD;
12811   StringRef UniqueDeclName;
12812   LValue IVLVal;
12813   llvm::Function *FoundFn;
12814   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12815       Checker.getFoundData();
12816   if (FoundFn != CGF.CurFn) {
12817     // Special codegen for inner parallel regions.
12818     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12819     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12820     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12821            "Lastprivate conditional is not found in outer region.");
12822     QualType StructTy = std::get<0>(It->getSecond());
12823     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12824     LValue PrivLVal = CGF.EmitLValue(FoundE);
12825     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12826         PrivLVal.getAddress(CGF),
12827         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12828         CGF.ConvertTypeForMem(StructTy));
12829     LValue BaseLVal =
12830         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12831     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12832     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12833                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12834                         FiredLVal, llvm::AtomicOrdering::Unordered,
12835                         /*IsVolatile=*/true, /*isInit=*/false);
12836     return;
12837   }
12838 
12839   // Private address of the lastprivate conditional in the current context.
12840   // priv_a
12841   LValue LVal = CGF.EmitLValue(FoundE);
12842   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12843                                    FoundE->getExprLoc());
12844 }
12845 
12846 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12847     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12848     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12849   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12850     return;
12851   auto Range = llvm::reverse(LastprivateConditionalStack);
12852   auto It = llvm::find_if(
12853       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12854   if (It == Range.end() || It->Fn != CGF.CurFn)
12855     return;
12856   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12857   assert(LPCI != LastprivateConditionalToTypes.end() &&
12858          "Lastprivates must be registered already.");
12859   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12860   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12861   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12862   for (const auto &Pair : It->DeclToUniqueName) {
12863     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12864     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12865       continue;
12866     auto I = LPCI->getSecond().find(Pair.first);
12867     assert(I != LPCI->getSecond().end() &&
12868            "Lastprivate must be rehistered already.");
12869     // bool Cmp = priv_a.Fired != 0;
12870     LValue BaseLVal = std::get<3>(I->getSecond());
12871     LValue FiredLVal =
12872         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12873     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12874     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12875     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12876     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12877     // if (Cmp) {
12878     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12879     CGF.EmitBlock(ThenBB);
12880     Address Addr = CGF.GetAddrOfLocalVar(VD);
12881     LValue LVal;
12882     if (VD->getType()->isReferenceType())
12883       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12884                                            AlignmentSource::Decl);
12885     else
12886       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12887                                 AlignmentSource::Decl);
12888     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12889                                      D.getBeginLoc());
12890     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12891     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12892     // }
12893   }
12894 }
12895 
12896 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12897     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12898     SourceLocation Loc) {
12899   if (CGF.getLangOpts().OpenMP < 50)
12900     return;
12901   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12902   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12903          "Unknown lastprivate conditional variable.");
12904   StringRef UniqueName = It->second;
12905   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12906   // The variable was not updated in the region - exit.
12907   if (!GV)
12908     return;
12909   LValue LPLVal = CGF.MakeAddrLValue(
12910       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12911       PrivLVal.getType().getNonReferenceType());
12912   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12913   CGF.EmitStoreOfScalar(Res, PrivLVal);
12914 }
12915 
12916 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12917     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12918     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12919   llvm_unreachable("Not supported in SIMD-only mode");
12920 }
12921 
12922 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12923     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12924     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12925   llvm_unreachable("Not supported in SIMD-only mode");
12926 }
12927 
12928 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12929     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12930     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12931     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12932     bool Tied, unsigned &NumberOfParts) {
12933   llvm_unreachable("Not supported in SIMD-only mode");
12934 }
12935 
12936 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12937                                            SourceLocation Loc,
12938                                            llvm::Function *OutlinedFn,
12939                                            ArrayRef<llvm::Value *> CapturedVars,
12940                                            const Expr *IfCond,
12941                                            llvm::Value *NumThreads) {
12942   llvm_unreachable("Not supported in SIMD-only mode");
12943 }
12944 
12945 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12946     CodeGenFunction &CGF, StringRef CriticalName,
12947     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12948     const Expr *Hint) {
12949   llvm_unreachable("Not supported in SIMD-only mode");
12950 }
12951 
12952 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12953                                            const RegionCodeGenTy &MasterOpGen,
12954                                            SourceLocation Loc) {
12955   llvm_unreachable("Not supported in SIMD-only mode");
12956 }
12957 
12958 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12959                                            const RegionCodeGenTy &MasterOpGen,
12960                                            SourceLocation Loc,
12961                                            const Expr *Filter) {
12962   llvm_unreachable("Not supported in SIMD-only mode");
12963 }
12964 
12965 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12966                                             SourceLocation Loc) {
12967   llvm_unreachable("Not supported in SIMD-only mode");
12968 }
12969 
12970 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12971     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12972     SourceLocation Loc) {
12973   llvm_unreachable("Not supported in SIMD-only mode");
12974 }
12975 
12976 void CGOpenMPSIMDRuntime::emitSingleRegion(
12977     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12978     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12979     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12980     ArrayRef<const Expr *> AssignmentOps) {
12981   llvm_unreachable("Not supported in SIMD-only mode");
12982 }
12983 
12984 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12985                                             const RegionCodeGenTy &OrderedOpGen,
12986                                             SourceLocation Loc,
12987                                             bool IsThreads) {
12988   llvm_unreachable("Not supported in SIMD-only mode");
12989 }
12990 
12991 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12992                                           SourceLocation Loc,
12993                                           OpenMPDirectiveKind Kind,
12994                                           bool EmitChecks,
12995                                           bool ForceSimpleCall) {
12996   llvm_unreachable("Not supported in SIMD-only mode");
12997 }
12998 
12999 void CGOpenMPSIMDRuntime::emitForDispatchInit(
13000     CodeGenFunction &CGF, SourceLocation Loc,
13001     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13002     bool Ordered, const DispatchRTInput &DispatchValues) {
13003   llvm_unreachable("Not supported in SIMD-only mode");
13004 }
13005 
13006 void CGOpenMPSIMDRuntime::emitForStaticInit(
13007     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
13008     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13009   llvm_unreachable("Not supported in SIMD-only mode");
13010 }
13011 
13012 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
13013     CodeGenFunction &CGF, SourceLocation Loc,
13014     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13015   llvm_unreachable("Not supported in SIMD-only mode");
13016 }
13017 
13018 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
13019                                                      SourceLocation Loc,
13020                                                      unsigned IVSize,
13021                                                      bool IVSigned) {
13022   llvm_unreachable("Not supported in SIMD-only mode");
13023 }
13024 
13025 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
13026                                               SourceLocation Loc,
13027                                               OpenMPDirectiveKind DKind) {
13028   llvm_unreachable("Not supported in SIMD-only mode");
13029 }
13030 
13031 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
13032                                               SourceLocation Loc,
13033                                               unsigned IVSize, bool IVSigned,
13034                                               Address IL, Address LB,
13035                                               Address UB, Address ST) {
13036   llvm_unreachable("Not supported in SIMD-only mode");
13037 }
13038 
13039 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
13040                                                llvm::Value *NumThreads,
13041                                                SourceLocation Loc) {
13042   llvm_unreachable("Not supported in SIMD-only mode");
13043 }
13044 
13045 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13046                                              ProcBindKind ProcBind,
13047                                              SourceLocation Loc) {
13048   llvm_unreachable("Not supported in SIMD-only mode");
13049 }
13050 
13051 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13052                                                     const VarDecl *VD,
13053                                                     Address VDAddr,
13054                                                     SourceLocation Loc) {
13055   llvm_unreachable("Not supported in SIMD-only mode");
13056 }
13057 
13058 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13059     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13060     CodeGenFunction *CGF) {
13061   llvm_unreachable("Not supported in SIMD-only mode");
13062 }
13063 
13064 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13065     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13066   llvm_unreachable("Not supported in SIMD-only mode");
13067 }
13068 
13069 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13070                                     ArrayRef<const Expr *> Vars,
13071                                     SourceLocation Loc,
13072                                     llvm::AtomicOrdering AO) {
13073   llvm_unreachable("Not supported in SIMD-only mode");
13074 }
13075 
13076 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13077                                        const OMPExecutableDirective &D,
13078                                        llvm::Function *TaskFunction,
13079                                        QualType SharedsTy, Address Shareds,
13080                                        const Expr *IfCond,
13081                                        const OMPTaskDataTy &Data) {
13082   llvm_unreachable("Not supported in SIMD-only mode");
13083 }
13084 
13085 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13086     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13087     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13088     const Expr *IfCond, const OMPTaskDataTy &Data) {
13089   llvm_unreachable("Not supported in SIMD-only mode");
13090 }
13091 
13092 void CGOpenMPSIMDRuntime::emitReduction(
13093     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13094     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13095     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13096   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13097   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13098                                  ReductionOps, Options);
13099 }
13100 
13101 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13102     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13103     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13104   llvm_unreachable("Not supported in SIMD-only mode");
13105 }
13106 
13107 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13108                                                 SourceLocation Loc,
13109                                                 bool IsWorksharingReduction) {
13110   llvm_unreachable("Not supported in SIMD-only mode");
13111 }
13112 
13113 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13114                                                   SourceLocation Loc,
13115                                                   ReductionCodeGen &RCG,
13116                                                   unsigned N) {
13117   llvm_unreachable("Not supported in SIMD-only mode");
13118 }
13119 
13120 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13121                                                   SourceLocation Loc,
13122                                                   llvm::Value *ReductionsPtr,
13123                                                   LValue SharedLVal) {
13124   llvm_unreachable("Not supported in SIMD-only mode");
13125 }
13126 
13127 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13128                                            SourceLocation Loc,
13129                                            const OMPTaskDataTy &Data) {
13130   llvm_unreachable("Not supported in SIMD-only mode");
13131 }
13132 
13133 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13134     CodeGenFunction &CGF, SourceLocation Loc,
13135     OpenMPDirectiveKind CancelRegion) {
13136   llvm_unreachable("Not supported in SIMD-only mode");
13137 }
13138 
13139 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13140                                          SourceLocation Loc, const Expr *IfCond,
13141                                          OpenMPDirectiveKind CancelRegion) {
13142   llvm_unreachable("Not supported in SIMD-only mode");
13143 }
13144 
13145 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13146     const OMPExecutableDirective &D, StringRef ParentName,
13147     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13148     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13149   llvm_unreachable("Not supported in SIMD-only mode");
13150 }
13151 
13152 void CGOpenMPSIMDRuntime::emitTargetCall(
13153     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13154     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13155     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13156     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13157                                      const OMPLoopDirective &D)>
13158         SizeEmitter) {
13159   llvm_unreachable("Not supported in SIMD-only mode");
13160 }
13161 
13162 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13163   llvm_unreachable("Not supported in SIMD-only mode");
13164 }
13165 
13166 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13167   llvm_unreachable("Not supported in SIMD-only mode");
13168 }
13169 
13170 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13171   return false;
13172 }
13173 
13174 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13175                                         const OMPExecutableDirective &D,
13176                                         SourceLocation Loc,
13177                                         llvm::Function *OutlinedFn,
13178                                         ArrayRef<llvm::Value *> CapturedVars) {
13179   llvm_unreachable("Not supported in SIMD-only mode");
13180 }
13181 
13182 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13183                                              const Expr *NumTeams,
13184                                              const Expr *ThreadLimit,
13185                                              SourceLocation Loc) {
13186   llvm_unreachable("Not supported in SIMD-only mode");
13187 }
13188 
13189 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13190     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13191     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13192   llvm_unreachable("Not supported in SIMD-only mode");
13193 }
13194 
13195 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13196     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13197     const Expr *Device) {
13198   llvm_unreachable("Not supported in SIMD-only mode");
13199 }
13200 
13201 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13202                                            const OMPLoopDirective &D,
13203                                            ArrayRef<Expr *> NumIterations) {
13204   llvm_unreachable("Not supported in SIMD-only mode");
13205 }
13206 
13207 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13208                                               const OMPDependClause *C) {
13209   llvm_unreachable("Not supported in SIMD-only mode");
13210 }
13211 
13212 const VarDecl *
13213 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13214                                         const VarDecl *NativeParam) const {
13215   llvm_unreachable("Not supported in SIMD-only mode");
13216 }
13217 
13218 Address
13219 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13220                                          const VarDecl *NativeParam,
13221                                          const VarDecl *TargetParam) const {
13222   llvm_unreachable("Not supported in SIMD-only mode");
13223 }
13224