1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/Value.h"
39 #include "llvm/Support/AtomicOrdering.h"
40 #include "llvm/Support/Format.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <cassert>
43 #include <numeric>
44 
45 using namespace clang;
46 using namespace CodeGen;
47 using namespace llvm::omp;
48 
49 namespace {
50 /// Base class for handling code generation inside OpenMP regions.
51 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
52 public:
53   /// Kinds of OpenMP regions used in codegen.
54   enum CGOpenMPRegionKind {
55     /// Region with outlined function for standalone 'parallel'
56     /// directive.
57     ParallelOutlinedRegion,
58     /// Region with outlined function for standalone 'task' directive.
59     TaskOutlinedRegion,
60     /// Region for constructs that do not require function outlining,
61     /// like 'for', 'sections', 'atomic' etc. directives.
62     InlinedRegion,
63     /// Region with outlined function for standalone 'target' directive.
64     TargetRegion,
65   };
66 
67   CGOpenMPRegionInfo(const CapturedStmt &CS,
68                      const CGOpenMPRegionKind RegionKind,
69                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
70                      bool HasCancel)
71       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
72         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
73 
74   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
75                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
76                      bool HasCancel)
77       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
78         Kind(Kind), HasCancel(HasCancel) {}
79 
80   /// Get a variable or parameter for storing global thread id
81   /// inside OpenMP construct.
82   virtual const VarDecl *getThreadIDVariable() const = 0;
83 
84   /// Emit the captured statement body.
85   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
86 
87   /// Get an LValue for the current ThreadID variable.
88   /// \return LValue for thread id variable. This LValue always has type int32*.
89   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
90 
91   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
92 
93   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
94 
95   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
96 
97   bool hasCancel() const { return HasCancel; }
98 
99   static bool classof(const CGCapturedStmtInfo *Info) {
100     return Info->getKind() == CR_OpenMP;
101   }
102 
103   ~CGOpenMPRegionInfo() override = default;
104 
105 protected:
106   CGOpenMPRegionKind RegionKind;
107   RegionCodeGenTy CodeGen;
108   OpenMPDirectiveKind Kind;
109   bool HasCancel;
110 };
111 
112 /// API for captured statement code generation in OpenMP constructs.
113 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
114 public:
115   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
116                              const RegionCodeGenTy &CodeGen,
117                              OpenMPDirectiveKind Kind, bool HasCancel,
118                              StringRef HelperName)
119       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
120                            HasCancel),
121         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
122     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
123   }
124 
125   /// Get a variable or parameter for storing global thread id
126   /// inside OpenMP construct.
127   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
128 
129   /// Get the name of the capture helper.
130   StringRef getHelperName() const override { return HelperName; }
131 
132   static bool classof(const CGCapturedStmtInfo *Info) {
133     return CGOpenMPRegionInfo::classof(Info) &&
134            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
135                ParallelOutlinedRegion;
136   }
137 
138 private:
139   /// A variable or parameter storing global thread id for OpenMP
140   /// constructs.
141   const VarDecl *ThreadIDVar;
142   StringRef HelperName;
143 };
144 
145 /// API for captured statement code generation in OpenMP constructs.
146 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
147 public:
148   class UntiedTaskActionTy final : public PrePostActionTy {
149     bool Untied;
150     const VarDecl *PartIDVar;
151     const RegionCodeGenTy UntiedCodeGen;
152     llvm::SwitchInst *UntiedSwitch = nullptr;
153 
154   public:
155     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
156                        const RegionCodeGenTy &UntiedCodeGen)
157         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
158     void Enter(CodeGenFunction &CGF) override {
159       if (Untied) {
160         // Emit task switching point.
161         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
162             CGF.GetAddrOfLocalVar(PartIDVar),
163             PartIDVar->getType()->castAs<PointerType>());
164         llvm::Value *Res =
165             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
166         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
167         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
168         CGF.EmitBlock(DoneBB);
169         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
170         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
171         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
172                               CGF.Builder.GetInsertBlock());
173         emitUntiedSwitch(CGF);
174       }
175     }
176     void emitUntiedSwitch(CodeGenFunction &CGF) const {
177       if (Untied) {
178         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
179             CGF.GetAddrOfLocalVar(PartIDVar),
180             PartIDVar->getType()->castAs<PointerType>());
181         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
182                               PartIdLVal);
183         UntiedCodeGen(CGF);
184         CodeGenFunction::JumpDest CurPoint =
185             CGF.getJumpDestInCurrentScope(".untied.next.");
186         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
187         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
188         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
189                               CGF.Builder.GetInsertBlock());
190         CGF.EmitBranchThroughCleanup(CurPoint);
191         CGF.EmitBlock(CurPoint.getBlock());
192       }
193     }
194     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
195   };
196   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
197                                  const VarDecl *ThreadIDVar,
198                                  const RegionCodeGenTy &CodeGen,
199                                  OpenMPDirectiveKind Kind, bool HasCancel,
200                                  const UntiedTaskActionTy &Action)
201       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
202         ThreadIDVar(ThreadIDVar), Action(Action) {
203     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
204   }
205 
206   /// Get a variable or parameter for storing global thread id
207   /// inside OpenMP construct.
208   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
209 
210   /// Get an LValue for the current ThreadID variable.
211   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
212 
213   /// Get the name of the capture helper.
214   StringRef getHelperName() const override { return ".omp_outlined."; }
215 
216   void emitUntiedSwitch(CodeGenFunction &CGF) override {
217     Action.emitUntiedSwitch(CGF);
218   }
219 
220   static bool classof(const CGCapturedStmtInfo *Info) {
221     return CGOpenMPRegionInfo::classof(Info) &&
222            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
223                TaskOutlinedRegion;
224   }
225 
226 private:
227   /// A variable or parameter storing global thread id for OpenMP
228   /// constructs.
229   const VarDecl *ThreadIDVar;
230   /// Action for emitting code for untied tasks.
231   const UntiedTaskActionTy &Action;
232 };
233 
234 /// API for inlined captured statement code generation in OpenMP
235 /// constructs.
236 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
237 public:
238   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
239                             const RegionCodeGenTy &CodeGen,
240                             OpenMPDirectiveKind Kind, bool HasCancel)
241       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
242         OldCSI(OldCSI),
243         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
244 
245   // Retrieve the value of the context parameter.
246   llvm::Value *getContextValue() const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->getContextValue();
249     llvm_unreachable("No context value for inlined OpenMP region");
250   }
251 
252   void setContextValue(llvm::Value *V) override {
253     if (OuterRegionInfo) {
254       OuterRegionInfo->setContextValue(V);
255       return;
256     }
257     llvm_unreachable("No context value for inlined OpenMP region");
258   }
259 
260   /// Lookup the captured field decl for a variable.
261   const FieldDecl *lookup(const VarDecl *VD) const override {
262     if (OuterRegionInfo)
263       return OuterRegionInfo->lookup(VD);
264     // If there is no outer outlined region,no need to lookup in a list of
265     // captured variables, we can use the original one.
266     return nullptr;
267   }
268 
269   FieldDecl *getThisFieldDecl() const override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThisFieldDecl();
272     return nullptr;
273   }
274 
275   /// Get a variable or parameter for storing global thread id
276   /// inside OpenMP construct.
277   const VarDecl *getThreadIDVariable() const override {
278     if (OuterRegionInfo)
279       return OuterRegionInfo->getThreadIDVariable();
280     return nullptr;
281   }
282 
283   /// Get an LValue for the current ThreadID variable.
284   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
285     if (OuterRegionInfo)
286       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
287     llvm_unreachable("No LValue for inlined OpenMP construct");
288   }
289 
290   /// Get the name of the capture helper.
291   StringRef getHelperName() const override {
292     if (auto *OuterRegionInfo = getOldCSI())
293       return OuterRegionInfo->getHelperName();
294     llvm_unreachable("No helper name for inlined OpenMP construct");
295   }
296 
297   void emitUntiedSwitch(CodeGenFunction &CGF) override {
298     if (OuterRegionInfo)
299       OuterRegionInfo->emitUntiedSwitch(CGF);
300   }
301 
302   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
303 
304   static bool classof(const CGCapturedStmtInfo *Info) {
305     return CGOpenMPRegionInfo::classof(Info) &&
306            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
307   }
308 
309   ~CGOpenMPInlinedRegionInfo() override = default;
310 
311 private:
312   /// CodeGen info about outer OpenMP region.
313   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
314   CGOpenMPRegionInfo *OuterRegionInfo;
315 };
316 
317 /// API for captured statement code generation in OpenMP target
318 /// constructs. For this captures, implicit parameters are used instead of the
319 /// captured fields. The name of the target region has to be unique in a given
320 /// application so it is provided by the client, because only the client has
321 /// the information to generate that.
322 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
323 public:
324   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
325                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
326       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
327                            /*HasCancel=*/false),
328         HelperName(HelperName) {}
329 
330   /// This is unused for target regions because each starts executing
331   /// with a single thread.
332   const VarDecl *getThreadIDVariable() const override { return nullptr; }
333 
334   /// Get the name of the capture helper.
335   StringRef getHelperName() const override { return HelperName; }
336 
337   static bool classof(const CGCapturedStmtInfo *Info) {
338     return CGOpenMPRegionInfo::classof(Info) &&
339            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
340   }
341 
342 private:
343   StringRef HelperName;
344 };
345 
346 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
347   llvm_unreachable("No codegen for expressions");
348 }
349 /// API for generation of expressions captured in a innermost OpenMP
350 /// region.
351 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
352 public:
353   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
354       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
355                                   OMPD_unknown,
356                                   /*HasCancel=*/false),
357         PrivScope(CGF) {
358     // Make sure the globals captured in the provided statement are local by
359     // using the privatization logic. We assume the same variable is not
360     // captured more than once.
361     for (const auto &C : CS.captures()) {
362       if (!C.capturesVariable() && !C.capturesVariableByCopy())
363         continue;
364 
365       const VarDecl *VD = C.getCapturedVar();
366       if (VD->isLocalVarDeclOrParm())
367         continue;
368 
369       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
370                       /*RefersToEnclosingVariableOrCapture=*/false,
371                       VD->getType().getNonReferenceType(), VK_LValue,
372                       C.getLocation());
373       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
374     }
375     (void)PrivScope.Privatize();
376   }
377 
378   /// Lookup the captured field decl for a variable.
379   const FieldDecl *lookup(const VarDecl *VD) const override {
380     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381       return FD;
382     return nullptr;
383   }
384 
385   /// Emit the captured statement body.
386   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387     llvm_unreachable("No body for expressions");
388   }
389 
390   /// Get a variable or parameter for storing global thread id
391   /// inside OpenMP construct.
392   const VarDecl *getThreadIDVariable() const override {
393     llvm_unreachable("No thread id for expressions");
394   }
395 
396   /// Get the name of the capture helper.
397   StringRef getHelperName() const override {
398     llvm_unreachable("No helper name for expressions");
399   }
400 
401   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402 
403 private:
404   /// Private scope to capture global variables.
405   CodeGenFunction::OMPPrivateScope PrivScope;
406 };
407 
408 /// RAII for emitting code of OpenMP constructs.
409 class InlinedOpenMPRegionRAII {
410   CodeGenFunction &CGF;
411   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
412   FieldDecl *LambdaThisCaptureField = nullptr;
413   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414   bool NoInheritance = false;
415 
416 public:
417   /// Constructs region for combined constructs.
418   /// \param CodeGen Code generation sequence for combined directives. Includes
419   /// a list of functions used for code generation of implicitly inlined
420   /// regions.
421   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422                           OpenMPDirectiveKind Kind, bool HasCancel,
423                           bool NoInheritance = true)
424       : CGF(CGF), NoInheritance(NoInheritance) {
425     // Start emission for the construct.
426     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428     if (NoInheritance) {
429       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431       CGF.LambdaThisCaptureField = nullptr;
432       BlockInfo = CGF.BlockInfo;
433       CGF.BlockInfo = nullptr;
434     }
435   }
436 
437   ~InlinedOpenMPRegionRAII() {
438     // Restore original CapturedStmtInfo only if we're done with code emission.
439     auto *OldCSI =
440         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441     delete CGF.CapturedStmtInfo;
442     CGF.CapturedStmtInfo = OldCSI;
443     if (NoInheritance) {
444       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446       CGF.BlockInfo = BlockInfo;
447     }
448   }
449 };
450 
451 /// Values for bit flags used in the ident_t to describe the fields.
452 /// All enumeric elements are named and described in accordance with the code
453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454 enum OpenMPLocationFlags : unsigned {
455   /// Use trampoline for internal microtask.
456   OMP_IDENT_IMD = 0x01,
457   /// Use c-style ident structure.
458   OMP_IDENT_KMPC = 0x02,
459   /// Atomic reduction option for kmpc_reduce.
460   OMP_ATOMIC_REDUCE = 0x10,
461   /// Explicit 'barrier' directive.
462   OMP_IDENT_BARRIER_EXPL = 0x20,
463   /// Implicit barrier in code.
464   OMP_IDENT_BARRIER_IMPL = 0x40,
465   /// Implicit barrier in 'for' directive.
466   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467   /// Implicit barrier in 'sections' directive.
468   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469   /// Implicit barrier in 'single' directive.
470   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471   /// Call of __kmp_for_static_init for static loop.
472   OMP_IDENT_WORK_LOOP = 0x200,
473   /// Call of __kmp_for_static_init for sections.
474   OMP_IDENT_WORK_SECTIONS = 0x400,
475   /// Call of __kmp_for_static_init for distribute.
476   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
478 };
479 
480 namespace {
481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
482 /// Values for bit flags for marking which requires clauses have been used.
483 enum OpenMPOffloadingRequiresDirFlags : int64_t {
484   /// flag undefined.
485   OMP_REQ_UNDEFINED               = 0x000,
486   /// no requires clause present.
487   OMP_REQ_NONE                    = 0x001,
488   /// reverse_offload clause.
489   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
490   /// unified_address clause.
491   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
492   /// unified_shared_memory clause.
493   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
494   /// dynamic_allocators clause.
495   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
496   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
497 };
498 
499 enum OpenMPOffloadingReservedDeviceIDs {
500   /// Device ID if the device was not defined, runtime should get it
501   /// from environment variables in the spec.
502   OMP_DEVICEID_UNDEF = -1,
503 };
504 } // anonymous namespace
505 
506 /// Describes ident structure that describes a source location.
507 /// All descriptions are taken from
508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
509 /// Original structure:
510 /// typedef struct ident {
511 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
512 ///                                  see above  */
513 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
514 ///                                  KMP_IDENT_KMPC identifies this union
515 ///                                  member  */
516 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
517 ///                                  see above */
518 ///#if USE_ITT_BUILD
519 ///                            /*  but currently used for storing
520 ///                                region-specific ITT */
521 ///                            /*  contextual information. */
522 ///#endif /* USE_ITT_BUILD */
523 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
524 ///                                 C++  */
525 ///    char const *psource;    /**< String describing the source location.
526 ///                            The string is composed of semi-colon separated
527 //                             fields which describe the source file,
528 ///                            the function and a pair of line numbers that
529 ///                            delimit the construct.
530 ///                             */
531 /// } ident_t;
532 enum IdentFieldIndex {
533   /// might be used in Fortran
534   IdentField_Reserved_1,
535   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
536   IdentField_Flags,
537   /// Not really used in Fortran any more
538   IdentField_Reserved_2,
539   /// Source[4] in Fortran, do not use for C++
540   IdentField_Reserved_3,
541   /// String describing the source location. The string is composed of
542   /// semi-colon separated fields which describe the source file, the function
543   /// and a pair of line numbers that delimit the construct.
544   IdentField_PSource
545 };
546 
547 /// Schedule types for 'omp for' loops (these enumerators are taken from
548 /// the enum sched_type in kmp.h).
549 enum OpenMPSchedType {
550   /// Lower bound for default (unordered) versions.
551   OMP_sch_lower = 32,
552   OMP_sch_static_chunked = 33,
553   OMP_sch_static = 34,
554   OMP_sch_dynamic_chunked = 35,
555   OMP_sch_guided_chunked = 36,
556   OMP_sch_runtime = 37,
557   OMP_sch_auto = 38,
558   /// static with chunk adjustment (e.g., simd)
559   OMP_sch_static_balanced_chunked = 45,
560   /// Lower bound for 'ordered' versions.
561   OMP_ord_lower = 64,
562   OMP_ord_static_chunked = 65,
563   OMP_ord_static = 66,
564   OMP_ord_dynamic_chunked = 67,
565   OMP_ord_guided_chunked = 68,
566   OMP_ord_runtime = 69,
567   OMP_ord_auto = 70,
568   OMP_sch_default = OMP_sch_static,
569   /// dist_schedule types
570   OMP_dist_sch_static_chunked = 91,
571   OMP_dist_sch_static = 92,
572   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
573   /// Set if the monotonic schedule modifier was present.
574   OMP_sch_modifier_monotonic = (1 << 29),
575   /// Set if the nonmonotonic schedule modifier was present.
576   OMP_sch_modifier_nonmonotonic = (1 << 30),
577 };
578 
579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
580 /// region.
581 class CleanupTy final : public EHScopeStack::Cleanup {
582   PrePostActionTy *Action;
583 
584 public:
585   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
586   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
587     if (!CGF.HaveInsertPoint())
588       return;
589     Action->Exit(CGF);
590   }
591 };
592 
593 } // anonymous namespace
594 
595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
596   CodeGenFunction::RunCleanupsScope Scope(CGF);
597   if (PrePostAction) {
598     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
599     Callback(CodeGen, CGF, *PrePostAction);
600   } else {
601     PrePostActionTy Action;
602     Callback(CodeGen, CGF, Action);
603   }
604 }
605 
606 /// Check if the combiner is a call to UDR combiner and if it is so return the
607 /// UDR decl used for reduction.
608 static const OMPDeclareReductionDecl *
609 getReductionInit(const Expr *ReductionOp) {
610   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
611     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
612       if (const auto *DRE =
613               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
614         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
615           return DRD;
616   return nullptr;
617 }
618 
619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
620                                              const OMPDeclareReductionDecl *DRD,
621                                              const Expr *InitOp,
622                                              Address Private, Address Original,
623                                              QualType Ty) {
624   if (DRD->getInitializer()) {
625     std::pair<llvm::Function *, llvm::Function *> Reduction =
626         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
627     const auto *CE = cast<CallExpr>(InitOp);
628     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
629     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
630     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
631     const auto *LHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
633     const auto *RHSDRE =
634         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
635     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
636     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
637     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
638     (void)PrivateScope.Privatize();
639     RValue Func = RValue::get(Reduction.second);
640     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
641     CGF.EmitIgnoredExpr(InitOp);
642   } else {
643     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
644     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
645     auto *GV = new llvm::GlobalVariable(
646         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
647         llvm::GlobalValue::PrivateLinkage, Init, Name);
648     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
649     RValue InitRVal;
650     switch (CGF.getEvaluationKind(Ty)) {
651     case TEK_Scalar:
652       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
653       break;
654     case TEK_Complex:
655       InitRVal =
656           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
657       break;
658     case TEK_Aggregate: {
659       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
660       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
661       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
662                            /*IsInitializer=*/false);
663       return;
664     }
665     }
666     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
667     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
668     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
669                          /*IsInitializer=*/false);
670   }
671 }
672 
673 /// Emit initialization of arrays of complex types.
674 /// \param DestAddr Address of the array.
675 /// \param Type Type of array.
676 /// \param Init Initial expression of array.
677 /// \param SrcAddr Address of the original array.
678 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
679                                  QualType Type, bool EmitDeclareReductionInit,
680                                  const Expr *Init,
681                                  const OMPDeclareReductionDecl *DRD,
682                                  Address SrcAddr = Address::invalid()) {
683   // Perform element-by-element initialization.
684   QualType ElementTy;
685 
686   // Drill down to the base element type on both arrays.
687   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
688   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
689   if (DRD)
690     SrcAddr =
691         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
692 
693   llvm::Value *SrcBegin = nullptr;
694   if (DRD)
695     SrcBegin = SrcAddr.getPointer();
696   llvm::Value *DestBegin = DestAddr.getPointer();
697   // Cast from pointer to array type to pointer to single element.
698   llvm::Value *DestEnd =
699       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
700   // The basic structure here is a while-do loop.
701   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
702   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
703   llvm::Value *IsEmpty =
704       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
705   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
706 
707   // Enter the loop body, making that address the current address.
708   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
709   CGF.EmitBlock(BodyBB);
710 
711   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
712 
713   llvm::PHINode *SrcElementPHI = nullptr;
714   Address SrcElementCurrent = Address::invalid();
715   if (DRD) {
716     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
717                                           "omp.arraycpy.srcElementPast");
718     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
719     SrcElementCurrent =
720         Address(SrcElementPHI, SrcAddr.getElementType(),
721                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
722   }
723   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
724       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
725   DestElementPHI->addIncoming(DestBegin, EntryBB);
726   Address DestElementCurrent =
727       Address(DestElementPHI, DestAddr.getElementType(),
728               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
729 
730   // Emit copy.
731   {
732     CodeGenFunction::RunCleanupsScope InitScope(CGF);
733     if (EmitDeclareReductionInit) {
734       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
735                                        SrcElementCurrent, ElementTy);
736     } else
737       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
738                            /*IsInitializer=*/false);
739   }
740 
741   if (DRD) {
742     // Shift the address forward by one element.
743     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
744         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
745         "omp.arraycpy.dest.element");
746     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
747   }
748 
749   // Shift the address forward by one element.
750   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
751       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
752       "omp.arraycpy.dest.element");
753   // Check whether we've reached the end.
754   llvm::Value *Done =
755       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
756   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
757   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
758 
759   // Done.
760   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
761 }
762 
763 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
764   return CGF.EmitOMPSharedLValue(E);
765 }
766 
767 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
768                                             const Expr *E) {
769   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
770     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
771   return LValue();
772 }
773 
774 void ReductionCodeGen::emitAggregateInitialization(
775     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
776     const OMPDeclareReductionDecl *DRD) {
777   // Emit VarDecl with copy init for arrays.
778   // Get the address of the original variable captured in current
779   // captured region.
780   const auto *PrivateVD =
781       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
782   bool EmitDeclareReductionInit =
783       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
784   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
785                        EmitDeclareReductionInit,
786                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
787                                                 : PrivateVD->getInit(),
788                        DRD, SharedAddr);
789 }
790 
791 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
792                                    ArrayRef<const Expr *> Origs,
793                                    ArrayRef<const Expr *> Privates,
794                                    ArrayRef<const Expr *> ReductionOps) {
795   ClausesData.reserve(Shareds.size());
796   SharedAddresses.reserve(Shareds.size());
797   Sizes.reserve(Shareds.size());
798   BaseDecls.reserve(Shareds.size());
799   const auto *IOrig = Origs.begin();
800   const auto *IPriv = Privates.begin();
801   const auto *IRed = ReductionOps.begin();
802   for (const Expr *Ref : Shareds) {
803     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
804     std::advance(IOrig, 1);
805     std::advance(IPriv, 1);
806     std::advance(IRed, 1);
807   }
808 }
809 
810 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
811   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
812          "Number of generated lvalues must be exactly N.");
813   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
814   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
815   SharedAddresses.emplace_back(First, Second);
816   if (ClausesData[N].Shared == ClausesData[N].Ref) {
817     OrigAddresses.emplace_back(First, Second);
818   } else {
819     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
820     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
821     OrigAddresses.emplace_back(First, Second);
822   }
823 }
824 
825 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
826   const auto *PrivateVD =
827       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
828   QualType PrivateType = PrivateVD->getType();
829   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
830   if (!PrivateType->isVariablyModifiedType()) {
831     Sizes.emplace_back(
832         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
833         nullptr);
834     return;
835   }
836   llvm::Value *Size;
837   llvm::Value *SizeInChars;
838   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
839   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
840   if (AsArraySection) {
841     Size = CGF.Builder.CreatePtrDiff(ElemType,
842                                      OrigAddresses[N].second.getPointer(CGF),
843                                      OrigAddresses[N].first.getPointer(CGF));
844     Size = CGF.Builder.CreateNUWAdd(
845         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
846     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
847   } else {
848     SizeInChars =
849         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
850     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
851   }
852   Sizes.emplace_back(SizeInChars, Size);
853   CodeGenFunction::OpaqueValueMapping OpaqueMap(
854       CGF,
855       cast<OpaqueValueExpr>(
856           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
857       RValue::get(Size));
858   CGF.EmitVariablyModifiedType(PrivateType);
859 }
860 
861 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
862                                          llvm::Value *Size) {
863   const auto *PrivateVD =
864       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
865   QualType PrivateType = PrivateVD->getType();
866   if (!PrivateType->isVariablyModifiedType()) {
867     assert(!Size && !Sizes[N].second &&
868            "Size should be nullptr for non-variably modified reduction "
869            "items.");
870     return;
871   }
872   CodeGenFunction::OpaqueValueMapping OpaqueMap(
873       CGF,
874       cast<OpaqueValueExpr>(
875           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
876       RValue::get(Size));
877   CGF.EmitVariablyModifiedType(PrivateType);
878 }
879 
880 void ReductionCodeGen::emitInitialization(
881     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
882     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
883   assert(SharedAddresses.size() > N && "No variable was generated");
884   const auto *PrivateVD =
885       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
886   const OMPDeclareReductionDecl *DRD =
887       getReductionInit(ClausesData[N].ReductionOp);
888   QualType PrivateType = PrivateVD->getType();
889   PrivateAddr = CGF.Builder.CreateElementBitCast(
890       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
891   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
892     if (DRD && DRD->getInitializer())
893       (void)DefaultInit(CGF);
894     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
895   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
896     (void)DefaultInit(CGF);
897     QualType SharedType = SharedAddresses[N].first.getType();
898     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
899                                      PrivateAddr, SharedAddr, SharedType);
900   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
901              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
902     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
903                          PrivateVD->getType().getQualifiers(),
904                          /*IsInitializer=*/false);
905   }
906 }
907 
908 bool ReductionCodeGen::needCleanups(unsigned N) {
909   const auto *PrivateVD =
910       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
911   QualType PrivateType = PrivateVD->getType();
912   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
913   return DTorKind != QualType::DK_none;
914 }
915 
916 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
917                                     Address PrivateAddr) {
918   const auto *PrivateVD =
919       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
920   QualType PrivateType = PrivateVD->getType();
921   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
922   if (needCleanups(N)) {
923     PrivateAddr = CGF.Builder.CreateElementBitCast(
924         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
925     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
926   }
927 }
928 
929 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
930                           LValue BaseLV) {
931   BaseTy = BaseTy.getNonReferenceType();
932   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
933          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
934     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
935       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
936     } else {
937       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
938       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
939     }
940     BaseTy = BaseTy->getPointeeType();
941   }
942   return CGF.MakeAddrLValue(
943       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
944                                        CGF.ConvertTypeForMem(ElTy)),
945       BaseLV.getType(), BaseLV.getBaseInfo(),
946       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
947 }
948 
949 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
950                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
951                           llvm::Value *Addr) {
952   Address Tmp = Address::invalid();
953   Address TopTmp = Address::invalid();
954   Address MostTopTmp = Address::invalid();
955   BaseTy = BaseTy.getNonReferenceType();
956   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
957          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
958     Tmp = CGF.CreateMemTemp(BaseTy);
959     if (TopTmp.isValid())
960       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
961     else
962       MostTopTmp = Tmp;
963     TopTmp = Tmp;
964     BaseTy = BaseTy->getPointeeType();
965   }
966   llvm::Type *Ty = BaseLVType;
967   if (Tmp.isValid())
968     Ty = Tmp.getElementType();
969   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
970   if (Tmp.isValid()) {
971     CGF.Builder.CreateStore(Addr, Tmp);
972     return MostTopTmp;
973   }
974   return Address::deprecated(Addr, BaseLVAlignment);
975 }
976 
977 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
978   const VarDecl *OrigVD = nullptr;
979   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
980     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
981     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
982       Base = TempOASE->getBase()->IgnoreParenImpCasts();
983     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
984       Base = TempASE->getBase()->IgnoreParenImpCasts();
985     DE = cast<DeclRefExpr>(Base);
986     OrigVD = cast<VarDecl>(DE->getDecl());
987   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
988     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
989     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
990       Base = TempASE->getBase()->IgnoreParenImpCasts();
991     DE = cast<DeclRefExpr>(Base);
992     OrigVD = cast<VarDecl>(DE->getDecl());
993   }
994   return OrigVD;
995 }
996 
997 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
998                                                Address PrivateAddr) {
999   const DeclRefExpr *DE;
1000   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1001     BaseDecls.emplace_back(OrigVD);
1002     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1003     LValue BaseLValue =
1004         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1005                     OriginalBaseLValue);
1006     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1007     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1008         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1009         SharedAddr.getPointer());
1010     llvm::Value *PrivatePointer =
1011         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1012             PrivateAddr.getPointer(), SharedAddr.getType());
1013     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1014         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1015     return castToBase(CGF, OrigVD->getType(),
1016                       SharedAddresses[N].first.getType(),
1017                       OriginalBaseLValue.getAddress(CGF).getType(),
1018                       OriginalBaseLValue.getAlignment(), Ptr);
1019   }
1020   BaseDecls.emplace_back(
1021       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1022   return PrivateAddr;
1023 }
1024 
1025 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1026   const OMPDeclareReductionDecl *DRD =
1027       getReductionInit(ClausesData[N].ReductionOp);
1028   return DRD && DRD->getInitializer();
1029 }
1030 
1031 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1032   return CGF.EmitLoadOfPointerLValue(
1033       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1034       getThreadIDVariable()->getType()->castAs<PointerType>());
1035 }
1036 
1037 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1038   if (!CGF.HaveInsertPoint())
1039     return;
1040   // 1.2.2 OpenMP Language Terminology
1041   // Structured block - An executable statement with a single entry at the
1042   // top and a single exit at the bottom.
1043   // The point of exit cannot be a branch out of the structured block.
1044   // longjmp() and throw() must not violate the entry/exit criteria.
1045   CGF.EHStack.pushTerminate();
1046   if (S)
1047     CGF.incrementProfileCounter(S);
1048   CodeGen(CGF);
1049   CGF.EHStack.popTerminate();
1050 }
1051 
1052 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1053     CodeGenFunction &CGF) {
1054   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1055                             getThreadIDVariable()->getType(),
1056                             AlignmentSource::Decl);
1057 }
1058 
1059 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1060                                        QualType FieldTy) {
1061   auto *Field = FieldDecl::Create(
1062       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1063       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1064       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1065   Field->setAccess(AS_public);
1066   DC->addDecl(Field);
1067   return Field;
1068 }
1069 
1070 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1071                                  StringRef Separator)
1072     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1073       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1074   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1075 
1076   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1077   OMPBuilder.initialize();
1078   loadOffloadInfoMetadata();
1079 }
1080 
1081 void CGOpenMPRuntime::clear() {
1082   InternalVars.clear();
1083   // Clean non-target variable declarations possibly used only in debug info.
1084   for (const auto &Data : EmittedNonTargetVariables) {
1085     if (!Data.getValue().pointsToAliveValue())
1086       continue;
1087     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1088     if (!GV)
1089       continue;
1090     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1091       continue;
1092     GV->eraseFromParent();
1093   }
1094 }
1095 
1096 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1097   SmallString<128> Buffer;
1098   llvm::raw_svector_ostream OS(Buffer);
1099   StringRef Sep = FirstSeparator;
1100   for (StringRef Part : Parts) {
1101     OS << Sep << Part;
1102     Sep = Separator;
1103   }
1104   return std::string(OS.str());
1105 }
1106 
1107 static llvm::Function *
1108 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1109                           const Expr *CombinerInitializer, const VarDecl *In,
1110                           const VarDecl *Out, bool IsCombiner) {
1111   // void .omp_combiner.(Ty *in, Ty *out);
1112   ASTContext &C = CGM.getContext();
1113   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1114   FunctionArgList Args;
1115   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1116                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1117   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1118                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1119   Args.push_back(&OmpOutParm);
1120   Args.push_back(&OmpInParm);
1121   const CGFunctionInfo &FnInfo =
1122       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1123   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1124   std::string Name = CGM.getOpenMPRuntime().getName(
1125       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1126   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1127                                     Name, &CGM.getModule());
1128   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1129   if (CGM.getLangOpts().Optimize) {
1130     Fn->removeFnAttr(llvm::Attribute::NoInline);
1131     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1132     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1133   }
1134   CodeGenFunction CGF(CGM);
1135   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1136   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1137   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1138                     Out->getLocation());
1139   CodeGenFunction::OMPPrivateScope Scope(CGF);
1140   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1141   Scope.addPrivate(
1142       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1143               .getAddress(CGF));
1144   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1145   Scope.addPrivate(
1146       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1147                .getAddress(CGF));
1148   (void)Scope.Privatize();
1149   if (!IsCombiner && Out->hasInit() &&
1150       !CGF.isTrivialInitializer(Out->getInit())) {
1151     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1152                          Out->getType().getQualifiers(),
1153                          /*IsInitializer=*/true);
1154   }
1155   if (CombinerInitializer)
1156     CGF.EmitIgnoredExpr(CombinerInitializer);
1157   Scope.ForceCleanup();
1158   CGF.FinishFunction();
1159   return Fn;
1160 }
1161 
1162 void CGOpenMPRuntime::emitUserDefinedReduction(
1163     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1164   if (UDRMap.count(D) > 0)
1165     return;
1166   llvm::Function *Combiner = emitCombinerOrInitializer(
1167       CGM, D->getType(), D->getCombiner(),
1168       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1169       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1170       /*IsCombiner=*/true);
1171   llvm::Function *Initializer = nullptr;
1172   if (const Expr *Init = D->getInitializer()) {
1173     Initializer = emitCombinerOrInitializer(
1174         CGM, D->getType(),
1175         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1176                                                                      : nullptr,
1177         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1178         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1179         /*IsCombiner=*/false);
1180   }
1181   UDRMap.try_emplace(D, Combiner, Initializer);
1182   if (CGF) {
1183     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1184     Decls.second.push_back(D);
1185   }
1186 }
1187 
1188 std::pair<llvm::Function *, llvm::Function *>
1189 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1190   auto I = UDRMap.find(D);
1191   if (I != UDRMap.end())
1192     return I->second;
1193   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1194   return UDRMap.lookup(D);
1195 }
1196 
1197 namespace {
1198 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1199 // Builder if one is present.
1200 struct PushAndPopStackRAII {
1201   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1202                       bool HasCancel, llvm::omp::Directive Kind)
1203       : OMPBuilder(OMPBuilder) {
1204     if (!OMPBuilder)
1205       return;
1206 
1207     // The following callback is the crucial part of clangs cleanup process.
1208     //
1209     // NOTE:
1210     // Once the OpenMPIRBuilder is used to create parallel regions (and
1211     // similar), the cancellation destination (Dest below) is determined via
1212     // IP. That means if we have variables to finalize we split the block at IP,
1213     // use the new block (=BB) as destination to build a JumpDest (via
1214     // getJumpDestInCurrentScope(BB)) which then is fed to
1215     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1216     // to push & pop an FinalizationInfo object.
1217     // The FiniCB will still be needed but at the point where the
1218     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1219     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1220       assert(IP.getBlock()->end() == IP.getPoint() &&
1221              "Clang CG should cause non-terminated block!");
1222       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1223       CGF.Builder.restoreIP(IP);
1224       CodeGenFunction::JumpDest Dest =
1225           CGF.getOMPCancelDestination(OMPD_parallel);
1226       CGF.EmitBranchThroughCleanup(Dest);
1227     };
1228 
1229     // TODO: Remove this once we emit parallel regions through the
1230     //       OpenMPIRBuilder as it can do this setup internally.
1231     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1232     OMPBuilder->pushFinalizationCB(std::move(FI));
1233   }
1234   ~PushAndPopStackRAII() {
1235     if (OMPBuilder)
1236       OMPBuilder->popFinalizationCB();
1237   }
1238   llvm::OpenMPIRBuilder *OMPBuilder;
1239 };
1240 } // namespace
1241 
1242 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1243     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1244     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1245     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1246   assert(ThreadIDVar->getType()->isPointerType() &&
1247          "thread id variable must be of type kmp_int32 *");
1248   CodeGenFunction CGF(CGM, true);
1249   bool HasCancel = false;
1250   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1251     HasCancel = OPD->hasCancel();
1252   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1253     HasCancel = OPD->hasCancel();
1254   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1255     HasCancel = OPSD->hasCancel();
1256   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1257     HasCancel = OPFD->hasCancel();
1258   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1259     HasCancel = OPFD->hasCancel();
1260   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1261     HasCancel = OPFD->hasCancel();
1262   else if (const auto *OPFD =
1263                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1264     HasCancel = OPFD->hasCancel();
1265   else if (const auto *OPFD =
1266                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1267     HasCancel = OPFD->hasCancel();
1268 
1269   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1270   //       parallel region to make cancellation barriers work properly.
1271   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1272   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1273   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1274                                     HasCancel, OutlinedHelperName);
1275   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1276   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1277 }
1278 
1279 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1280     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1281     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1282   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1283   return emitParallelOrTeamsOutlinedFunction(
1284       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1285 }
1286 
1287 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1288     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1289     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1290   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1291   return emitParallelOrTeamsOutlinedFunction(
1292       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1293 }
1294 
1295 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1296     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1297     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1298     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1299     bool Tied, unsigned &NumberOfParts) {
1300   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1301                                               PrePostActionTy &) {
1302     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1303     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1304     llvm::Value *TaskArgs[] = {
1305         UpLoc, ThreadID,
1306         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1307                                     TaskTVar->getType()->castAs<PointerType>())
1308             .getPointer(CGF)};
1309     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1310                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1311                         TaskArgs);
1312   };
1313   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1314                                                             UntiedCodeGen);
1315   CodeGen.setAction(Action);
1316   assert(!ThreadIDVar->getType()->isPointerType() &&
1317          "thread id variable must be of type kmp_int32 for tasks");
1318   const OpenMPDirectiveKind Region =
1319       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1320                                                       : OMPD_task;
1321   const CapturedStmt *CS = D.getCapturedStmt(Region);
1322   bool HasCancel = false;
1323   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1324     HasCancel = TD->hasCancel();
1325   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1326     HasCancel = TD->hasCancel();
1327   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1328     HasCancel = TD->hasCancel();
1329   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1330     HasCancel = TD->hasCancel();
1331 
1332   CodeGenFunction CGF(CGM, true);
1333   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1334                                         InnermostKind, HasCancel, Action);
1335   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1336   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1337   if (!Tied)
1338     NumberOfParts = Action.getNumberOfParts();
1339   return Res;
1340 }
1341 
1342 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1343                              const RecordDecl *RD, const CGRecordLayout &RL,
1344                              ArrayRef<llvm::Constant *> Data) {
1345   llvm::StructType *StructTy = RL.getLLVMType();
1346   unsigned PrevIdx = 0;
1347   ConstantInitBuilder CIBuilder(CGM);
1348   const auto *DI = Data.begin();
1349   for (const FieldDecl *FD : RD->fields()) {
1350     unsigned Idx = RL.getLLVMFieldNo(FD);
1351     // Fill the alignment.
1352     for (unsigned I = PrevIdx; I < Idx; ++I)
1353       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1354     PrevIdx = Idx + 1;
1355     Fields.add(*DI);
1356     ++DI;
1357   }
1358 }
1359 
1360 template <class... As>
1361 static llvm::GlobalVariable *
1362 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1363                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1364                    As &&... Args) {
1365   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1366   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1367   ConstantInitBuilder CIBuilder(CGM);
1368   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1369   buildStructValue(Fields, CGM, RD, RL, Data);
1370   return Fields.finishAndCreateGlobal(
1371       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1372       std::forward<As>(Args)...);
1373 }
1374 
1375 template <typename T>
1376 static void
1377 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1378                                          ArrayRef<llvm::Constant *> Data,
1379                                          T &Parent) {
1380   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1381   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1382   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1383   buildStructValue(Fields, CGM, RD, RL, Data);
1384   Fields.finishAndAddTo(Parent);
1385 }
1386 
1387 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1388                                              bool AtCurrentPoint) {
1389   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1390   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1391 
1392   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1393   if (AtCurrentPoint) {
1394     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1395         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1396   } else {
1397     Elem.second.ServiceInsertPt =
1398         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1399     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1400   }
1401 }
1402 
1403 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1404   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1405   if (Elem.second.ServiceInsertPt) {
1406     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1407     Elem.second.ServiceInsertPt = nullptr;
1408     Ptr->eraseFromParent();
1409   }
1410 }
1411 
1412 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1413                                                   SourceLocation Loc,
1414                                                   SmallString<128> &Buffer) {
1415   llvm::raw_svector_ostream OS(Buffer);
1416   // Build debug location
1417   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1418   OS << ";" << PLoc.getFilename() << ";";
1419   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1420     OS << FD->getQualifiedNameAsString();
1421   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1422   return OS.str();
1423 }
1424 
1425 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1426                                                  SourceLocation Loc,
1427                                                  unsigned Flags) {
1428   uint32_t SrcLocStrSize;
1429   llvm::Constant *SrcLocStr;
1430   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1431       Loc.isInvalid()) {
1432     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1433   } else {
1434     std::string FunctionName;
1435     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1436       FunctionName = FD->getQualifiedNameAsString();
1437     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1438     const char *FileName = PLoc.getFilename();
1439     unsigned Line = PLoc.getLine();
1440     unsigned Column = PLoc.getColumn();
1441     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1442                                                 Column, SrcLocStrSize);
1443   }
1444   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1445   return OMPBuilder.getOrCreateIdent(
1446       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1447 }
1448 
1449 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1450                                           SourceLocation Loc) {
1451   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1452   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1453   // the clang invariants used below might be broken.
1454   if (CGM.getLangOpts().OpenMPIRBuilder) {
1455     SmallString<128> Buffer;
1456     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1457     uint32_t SrcLocStrSize;
1458     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1459         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1460     return OMPBuilder.getOrCreateThreadID(
1461         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1462   }
1463 
1464   llvm::Value *ThreadID = nullptr;
1465   // Check whether we've already cached a load of the thread id in this
1466   // function.
1467   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1468   if (I != OpenMPLocThreadIDMap.end()) {
1469     ThreadID = I->second.ThreadID;
1470     if (ThreadID != nullptr)
1471       return ThreadID;
1472   }
1473   // If exceptions are enabled, do not use parameter to avoid possible crash.
1474   if (auto *OMPRegionInfo =
1475           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1476     if (OMPRegionInfo->getThreadIDVariable()) {
1477       // Check if this an outlined function with thread id passed as argument.
1478       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1479       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1480       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1481           !CGF.getLangOpts().CXXExceptions ||
1482           CGF.Builder.GetInsertBlock() == TopBlock ||
1483           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1484           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1485               TopBlock ||
1486           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1487               CGF.Builder.GetInsertBlock()) {
1488         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1489         // If value loaded in entry block, cache it and use it everywhere in
1490         // function.
1491         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1492           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1493           Elem.second.ThreadID = ThreadID;
1494         }
1495         return ThreadID;
1496       }
1497     }
1498   }
1499 
1500   // This is not an outlined function region - need to call __kmpc_int32
1501   // kmpc_global_thread_num(ident_t *loc).
1502   // Generate thread id value and cache this value for use across the
1503   // function.
1504   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1505   if (!Elem.second.ServiceInsertPt)
1506     setLocThreadIdInsertPt(CGF);
1507   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1508   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1509   llvm::CallInst *Call = CGF.Builder.CreateCall(
1510       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1511                                             OMPRTL___kmpc_global_thread_num),
1512       emitUpdateLocation(CGF, Loc));
1513   Call->setCallingConv(CGF.getRuntimeCC());
1514   Elem.second.ThreadID = Call;
1515   return Call;
1516 }
1517 
1518 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1519   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1520   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1521     clearLocThreadIdInsertPt(CGF);
1522     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1523   }
1524   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1525     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1526       UDRMap.erase(D);
1527     FunctionUDRMap.erase(CGF.CurFn);
1528   }
1529   auto I = FunctionUDMMap.find(CGF.CurFn);
1530   if (I != FunctionUDMMap.end()) {
1531     for(const auto *D : I->second)
1532       UDMMap.erase(D);
1533     FunctionUDMMap.erase(I);
1534   }
1535   LastprivateConditionalToTypes.erase(CGF.CurFn);
1536   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1537 }
1538 
1539 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1540   return OMPBuilder.IdentPtr;
1541 }
1542 
1543 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1544   if (!Kmpc_MicroTy) {
1545     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1546     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1547                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1548     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1549   }
1550   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1551 }
1552 
1553 llvm::FunctionCallee
1554 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1555                                              bool IsGPUDistribute) {
1556   assert((IVSize == 32 || IVSize == 64) &&
1557          "IV size is not compatible with the omp runtime");
1558   StringRef Name;
1559   if (IsGPUDistribute)
1560     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1561                                     : "__kmpc_distribute_static_init_4u")
1562                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1563                                     : "__kmpc_distribute_static_init_8u");
1564   else
1565     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1566                                     : "__kmpc_for_static_init_4u")
1567                         : (IVSigned ? "__kmpc_for_static_init_8"
1568                                     : "__kmpc_for_static_init_8u");
1569 
1570   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1571   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1572   llvm::Type *TypeParams[] = {
1573     getIdentTyPointerTy(),                     // loc
1574     CGM.Int32Ty,                               // tid
1575     CGM.Int32Ty,                               // schedtype
1576     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1577     PtrTy,                                     // p_lower
1578     PtrTy,                                     // p_upper
1579     PtrTy,                                     // p_stride
1580     ITy,                                       // incr
1581     ITy                                        // chunk
1582   };
1583   auto *FnTy =
1584       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1585   return CGM.CreateRuntimeFunction(FnTy, Name);
1586 }
1587 
1588 llvm::FunctionCallee
1589 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1590   assert((IVSize == 32 || IVSize == 64) &&
1591          "IV size is not compatible with the omp runtime");
1592   StringRef Name =
1593       IVSize == 32
1594           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1595           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1596   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1597   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1598                                CGM.Int32Ty,           // tid
1599                                CGM.Int32Ty,           // schedtype
1600                                ITy,                   // lower
1601                                ITy,                   // upper
1602                                ITy,                   // stride
1603                                ITy                    // chunk
1604   };
1605   auto *FnTy =
1606       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1607   return CGM.CreateRuntimeFunction(FnTy, Name);
1608 }
1609 
1610 llvm::FunctionCallee
1611 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1612   assert((IVSize == 32 || IVSize == 64) &&
1613          "IV size is not compatible with the omp runtime");
1614   StringRef Name =
1615       IVSize == 32
1616           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1617           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1618   llvm::Type *TypeParams[] = {
1619       getIdentTyPointerTy(), // loc
1620       CGM.Int32Ty,           // tid
1621   };
1622   auto *FnTy =
1623       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1624   return CGM.CreateRuntimeFunction(FnTy, Name);
1625 }
1626 
1627 llvm::FunctionCallee
1628 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1629   assert((IVSize == 32 || IVSize == 64) &&
1630          "IV size is not compatible with the omp runtime");
1631   StringRef Name =
1632       IVSize == 32
1633           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1634           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1635   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1636   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1637   llvm::Type *TypeParams[] = {
1638     getIdentTyPointerTy(),                     // loc
1639     CGM.Int32Ty,                               // tid
1640     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1641     PtrTy,                                     // p_lower
1642     PtrTy,                                     // p_upper
1643     PtrTy                                      // p_stride
1644   };
1645   auto *FnTy =
1646       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1647   return CGM.CreateRuntimeFunction(FnTy, Name);
1648 }
1649 
1650 /// Obtain information that uniquely identifies a target entry. This
1651 /// consists of the file and device IDs as well as line number associated with
1652 /// the relevant entry source location.
1653 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1654                                      unsigned &DeviceID, unsigned &FileID,
1655                                      unsigned &LineNum) {
1656   SourceManager &SM = C.getSourceManager();
1657 
1658   // The loc should be always valid and have a file ID (the user cannot use
1659   // #pragma directives in macros)
1660 
1661   assert(Loc.isValid() && "Source location is expected to be always valid.");
1662 
1663   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1664   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1665 
1666   llvm::sys::fs::UniqueID ID;
1667   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1668     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1669     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1670     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1671       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1672           << PLoc.getFilename() << EC.message();
1673   }
1674 
1675   DeviceID = ID.getDevice();
1676   FileID = ID.getFile();
1677   LineNum = PLoc.getLine();
1678 }
1679 
1680 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1681   if (CGM.getLangOpts().OpenMPSimd)
1682     return Address::invalid();
1683   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1684       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1685   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1686               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1687                HasRequiresUnifiedSharedMemory))) {
1688     SmallString<64> PtrName;
1689     {
1690       llvm::raw_svector_ostream OS(PtrName);
1691       OS << CGM.getMangledName(GlobalDecl(VD));
1692       if (!VD->isExternallyVisible()) {
1693         unsigned DeviceID, FileID, Line;
1694         getTargetEntryUniqueInfo(CGM.getContext(),
1695                                  VD->getCanonicalDecl()->getBeginLoc(),
1696                                  DeviceID, FileID, Line);
1697         OS << llvm::format("_%x", FileID);
1698       }
1699       OS << "_decl_tgt_ref_ptr";
1700     }
1701     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1702     if (!Ptr) {
1703       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1704       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1705                                         PtrName);
1706 
1707       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1708       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1709 
1710       if (!CGM.getLangOpts().OpenMPIsDevice)
1711         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1712       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1713     }
1714     return Address::deprecated(Ptr, CGM.getContext().getDeclAlign(VD));
1715   }
1716   return Address::invalid();
1717 }
1718 
1719 llvm::Constant *
1720 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1721   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1722          !CGM.getContext().getTargetInfo().isTLSSupported());
1723   // Lookup the entry, lazily creating it if necessary.
1724   std::string Suffix = getName({"cache", ""});
1725   return getOrCreateInternalVariable(
1726       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1727 }
1728 
1729 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1730                                                 const VarDecl *VD,
1731                                                 Address VDAddr,
1732                                                 SourceLocation Loc) {
1733   if (CGM.getLangOpts().OpenMPUseTLS &&
1734       CGM.getContext().getTargetInfo().isTLSSupported())
1735     return VDAddr;
1736 
1737   llvm::Type *VarTy = VDAddr.getElementType();
1738   llvm::Value *Args[] = {
1739       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1740       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1741       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1742       getOrCreateThreadPrivateCache(VD)};
1743   return Address::deprecated(
1744       CGF.EmitRuntimeCall(
1745           OMPBuilder.getOrCreateRuntimeFunction(
1746               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1747           Args),
1748       VDAddr.getAlignment());
1749 }
1750 
1751 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1752     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1753     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1754   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1755   // library.
1756   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1757   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1758                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1759                       OMPLoc);
1760   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1761   // to register constructor/destructor for variable.
1762   llvm::Value *Args[] = {
1763       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1764       Ctor, CopyCtor, Dtor};
1765   CGF.EmitRuntimeCall(
1766       OMPBuilder.getOrCreateRuntimeFunction(
1767           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1768       Args);
1769 }
1770 
1771 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1772     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1773     bool PerformInit, CodeGenFunction *CGF) {
1774   if (CGM.getLangOpts().OpenMPUseTLS &&
1775       CGM.getContext().getTargetInfo().isTLSSupported())
1776     return nullptr;
1777 
1778   VD = VD->getDefinition(CGM.getContext());
1779   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1780     QualType ASTTy = VD->getType();
1781 
1782     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1783     const Expr *Init = VD->getAnyInitializer();
1784     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1785       // Generate function that re-emits the declaration's initializer into the
1786       // threadprivate copy of the variable VD
1787       CodeGenFunction CtorCGF(CGM);
1788       FunctionArgList Args;
1789       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1790                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1791                             ImplicitParamDecl::Other);
1792       Args.push_back(&Dst);
1793 
1794       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1795           CGM.getContext().VoidPtrTy, Args);
1796       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1797       std::string Name = getName({"__kmpc_global_ctor_", ""});
1798       llvm::Function *Fn =
1799           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1800       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1801                             Args, Loc, Loc);
1802       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1803           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1804           CGM.getContext().VoidPtrTy, Dst.getLocation());
1805       Address Arg = Address::deprecated(ArgVal, VDAddr.getAlignment());
1806       Arg = CtorCGF.Builder.CreateElementBitCast(
1807           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1808       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1809                                /*IsInitializer=*/true);
1810       ArgVal = CtorCGF.EmitLoadOfScalar(
1811           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1812           CGM.getContext().VoidPtrTy, Dst.getLocation());
1813       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1814       CtorCGF.FinishFunction();
1815       Ctor = Fn;
1816     }
1817     if (VD->getType().isDestructedType() != QualType::DK_none) {
1818       // Generate function that emits destructor call for the threadprivate copy
1819       // of the variable VD
1820       CodeGenFunction DtorCGF(CGM);
1821       FunctionArgList Args;
1822       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1823                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1824                             ImplicitParamDecl::Other);
1825       Args.push_back(&Dst);
1826 
1827       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1828           CGM.getContext().VoidTy, Args);
1829       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1830       std::string Name = getName({"__kmpc_global_dtor_", ""});
1831       llvm::Function *Fn =
1832           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1833       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1834       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1835                             Loc, Loc);
1836       // Create a scope with an artificial location for the body of this function.
1837       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1838       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1839           DtorCGF.GetAddrOfLocalVar(&Dst),
1840           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1841       DtorCGF.emitDestroy(Address::deprecated(ArgVal, VDAddr.getAlignment()),
1842                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1843                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1844       DtorCGF.FinishFunction();
1845       Dtor = Fn;
1846     }
1847     // Do not emit init function if it is not required.
1848     if (!Ctor && !Dtor)
1849       return nullptr;
1850 
1851     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1852     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1853                                                /*isVarArg=*/false)
1854                            ->getPointerTo();
1855     // Copying constructor for the threadprivate variable.
1856     // Must be NULL - reserved by runtime, but currently it requires that this
1857     // parameter is always NULL. Otherwise it fires assertion.
1858     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1859     if (Ctor == nullptr) {
1860       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1861                                              /*isVarArg=*/false)
1862                          ->getPointerTo();
1863       Ctor = llvm::Constant::getNullValue(CtorTy);
1864     }
1865     if (Dtor == nullptr) {
1866       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1867                                              /*isVarArg=*/false)
1868                          ->getPointerTo();
1869       Dtor = llvm::Constant::getNullValue(DtorTy);
1870     }
1871     if (!CGF) {
1872       auto *InitFunctionTy =
1873           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1874       std::string Name = getName({"__omp_threadprivate_init_", ""});
1875       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1876           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1877       CodeGenFunction InitCGF(CGM);
1878       FunctionArgList ArgList;
1879       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1880                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1881                             Loc, Loc);
1882       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1883       InitCGF.FinishFunction();
1884       return InitFunction;
1885     }
1886     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1887   }
1888   return nullptr;
1889 }
1890 
1891 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1892                                                      llvm::GlobalVariable *Addr,
1893                                                      bool PerformInit) {
1894   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1895       !CGM.getLangOpts().OpenMPIsDevice)
1896     return false;
1897   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1898       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1899   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1900       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1901        HasRequiresUnifiedSharedMemory))
1902     return CGM.getLangOpts().OpenMPIsDevice;
1903   VD = VD->getDefinition(CGM.getContext());
1904   assert(VD && "Unknown VarDecl");
1905 
1906   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1907     return CGM.getLangOpts().OpenMPIsDevice;
1908 
1909   QualType ASTTy = VD->getType();
1910   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1911 
1912   // Produce the unique prefix to identify the new target regions. We use
1913   // the source location of the variable declaration which we know to not
1914   // conflict with any target region.
1915   unsigned DeviceID;
1916   unsigned FileID;
1917   unsigned Line;
1918   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1919   SmallString<128> Buffer, Out;
1920   {
1921     llvm::raw_svector_ostream OS(Buffer);
1922     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1923        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1924   }
1925 
1926   const Expr *Init = VD->getAnyInitializer();
1927   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1928     llvm::Constant *Ctor;
1929     llvm::Constant *ID;
1930     if (CGM.getLangOpts().OpenMPIsDevice) {
1931       // Generate function that re-emits the declaration's initializer into
1932       // the threadprivate copy of the variable VD
1933       CodeGenFunction CtorCGF(CGM);
1934 
1935       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1936       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1937       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1938           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1939       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1940       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1941                             FunctionArgList(), Loc, Loc);
1942       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1943       CtorCGF.EmitAnyExprToMem(
1944           Init, Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)),
1945           Init->getType().getQualifiers(),
1946           /*IsInitializer=*/true);
1947       CtorCGF.FinishFunction();
1948       Ctor = Fn;
1949       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1950       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1951     } else {
1952       Ctor = new llvm::GlobalVariable(
1953           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1954           llvm::GlobalValue::PrivateLinkage,
1955           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1956       ID = Ctor;
1957     }
1958 
1959     // Register the information for the entry associated with the constructor.
1960     Out.clear();
1961     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1962         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1963         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1964   }
1965   if (VD->getType().isDestructedType() != QualType::DK_none) {
1966     llvm::Constant *Dtor;
1967     llvm::Constant *ID;
1968     if (CGM.getLangOpts().OpenMPIsDevice) {
1969       // Generate function that emits destructor call for the threadprivate
1970       // copy of the variable VD
1971       CodeGenFunction DtorCGF(CGM);
1972 
1973       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1974       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1975       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1976           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1977       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1978       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1979                             FunctionArgList(), Loc, Loc);
1980       // Create a scope with an artificial location for the body of this
1981       // function.
1982       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1983       DtorCGF.emitDestroy(
1984           Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), ASTTy,
1985           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1986           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1987       DtorCGF.FinishFunction();
1988       Dtor = Fn;
1989       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1990       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1991     } else {
1992       Dtor = new llvm::GlobalVariable(
1993           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1994           llvm::GlobalValue::PrivateLinkage,
1995           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1996       ID = Dtor;
1997     }
1998     // Register the information for the entry associated with the destructor.
1999     Out.clear();
2000     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2001         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2002         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2003   }
2004   return CGM.getLangOpts().OpenMPIsDevice;
2005 }
2006 
2007 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2008                                                           QualType VarType,
2009                                                           StringRef Name) {
2010   std::string Suffix = getName({"artificial", ""});
2011   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2012   llvm::GlobalVariable *GAddr =
2013       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2014   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2015       CGM.getTarget().isTLSSupported()) {
2016     GAddr->setThreadLocal(/*Val=*/true);
2017     return Address(GAddr, GAddr->getValueType(),
2018                    CGM.getContext().getTypeAlignInChars(VarType));
2019   }
2020   std::string CacheSuffix = getName({"cache", ""});
2021   llvm::Value *Args[] = {
2022       emitUpdateLocation(CGF, SourceLocation()),
2023       getThreadID(CGF, SourceLocation()),
2024       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2025       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2026                                 /*isSigned=*/false),
2027       getOrCreateInternalVariable(
2028           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2029   return Address(
2030       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2031           CGF.EmitRuntimeCall(
2032               OMPBuilder.getOrCreateRuntimeFunction(
2033                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2034               Args),
2035           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2036       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
2037 }
2038 
2039 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2040                                    const RegionCodeGenTy &ThenGen,
2041                                    const RegionCodeGenTy &ElseGen) {
2042   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2043 
2044   // If the condition constant folds and can be elided, try to avoid emitting
2045   // the condition and the dead arm of the if/else.
2046   bool CondConstant;
2047   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2048     if (CondConstant)
2049       ThenGen(CGF);
2050     else
2051       ElseGen(CGF);
2052     return;
2053   }
2054 
2055   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2056   // emit the conditional branch.
2057   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2058   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2059   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2060   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2061 
2062   // Emit the 'then' code.
2063   CGF.EmitBlock(ThenBlock);
2064   ThenGen(CGF);
2065   CGF.EmitBranch(ContBlock);
2066   // Emit the 'else' code if present.
2067   // There is no need to emit line number for unconditional branch.
2068   (void)ApplyDebugLocation::CreateEmpty(CGF);
2069   CGF.EmitBlock(ElseBlock);
2070   ElseGen(CGF);
2071   // There is no need to emit line number for unconditional branch.
2072   (void)ApplyDebugLocation::CreateEmpty(CGF);
2073   CGF.EmitBranch(ContBlock);
2074   // Emit the continuation block for code after the if.
2075   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2076 }
2077 
2078 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2079                                        llvm::Function *OutlinedFn,
2080                                        ArrayRef<llvm::Value *> CapturedVars,
2081                                        const Expr *IfCond,
2082                                        llvm::Value *NumThreads) {
2083   if (!CGF.HaveInsertPoint())
2084     return;
2085   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2086   auto &M = CGM.getModule();
2087   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2088                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2089     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2090     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2091     llvm::Value *Args[] = {
2092         RTLoc,
2093         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2094         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2095     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2096     RealArgs.append(std::begin(Args), std::end(Args));
2097     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2098 
2099     llvm::FunctionCallee RTLFn =
2100         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2101     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2102   };
2103   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2104                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2105     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2106     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2107     // Build calls:
2108     // __kmpc_serialized_parallel(&Loc, GTid);
2109     llvm::Value *Args[] = {RTLoc, ThreadID};
2110     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2111                             M, OMPRTL___kmpc_serialized_parallel),
2112                         Args);
2113 
2114     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2115     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2116     Address ZeroAddrBound =
2117         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2118                                          /*Name=*/".bound.zero.addr");
2119     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2120     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2121     // ThreadId for serialized parallels is 0.
2122     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2123     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2124     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2125 
2126     // Ensure we do not inline the function. This is trivially true for the ones
2127     // passed to __kmpc_fork_call but the ones called in serialized regions
2128     // could be inlined. This is not a perfect but it is closer to the invariant
2129     // we want, namely, every data environment starts with a new function.
2130     // TODO: We should pass the if condition to the runtime function and do the
2131     //       handling there. Much cleaner code.
2132     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2133     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2134     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2135 
2136     // __kmpc_end_serialized_parallel(&Loc, GTid);
2137     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2138     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2139                             M, OMPRTL___kmpc_end_serialized_parallel),
2140                         EndArgs);
2141   };
2142   if (IfCond) {
2143     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2144   } else {
2145     RegionCodeGenTy ThenRCG(ThenGen);
2146     ThenRCG(CGF);
2147   }
2148 }
2149 
2150 // If we're inside an (outlined) parallel region, use the region info's
2151 // thread-ID variable (it is passed in a first argument of the outlined function
2152 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2153 // regular serial code region, get thread ID by calling kmp_int32
2154 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2155 // return the address of that temp.
2156 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2157                                              SourceLocation Loc) {
2158   if (auto *OMPRegionInfo =
2159           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2160     if (OMPRegionInfo->getThreadIDVariable())
2161       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2162 
2163   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2164   QualType Int32Ty =
2165       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2166   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2167   CGF.EmitStoreOfScalar(ThreadID,
2168                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2169 
2170   return ThreadIDTemp;
2171 }
2172 
2173 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2174     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2175   SmallString<256> Buffer;
2176   llvm::raw_svector_ostream Out(Buffer);
2177   Out << Name;
2178   StringRef RuntimeName = Out.str();
2179   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2180   if (Elem.second) {
2181     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2182            "OMP internal variable has different type than requested");
2183     return &*Elem.second;
2184   }
2185 
2186   return Elem.second = new llvm::GlobalVariable(
2187              CGM.getModule(), Ty, /*IsConstant*/ false,
2188              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2189              Elem.first(), /*InsertBefore=*/nullptr,
2190              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2191 }
2192 
2193 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2194   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2195   std::string Name = getName({Prefix, "var"});
2196   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2197 }
2198 
2199 namespace {
2200 /// Common pre(post)-action for different OpenMP constructs.
2201 class CommonActionTy final : public PrePostActionTy {
2202   llvm::FunctionCallee EnterCallee;
2203   ArrayRef<llvm::Value *> EnterArgs;
2204   llvm::FunctionCallee ExitCallee;
2205   ArrayRef<llvm::Value *> ExitArgs;
2206   bool Conditional;
2207   llvm::BasicBlock *ContBlock = nullptr;
2208 
2209 public:
2210   CommonActionTy(llvm::FunctionCallee EnterCallee,
2211                  ArrayRef<llvm::Value *> EnterArgs,
2212                  llvm::FunctionCallee ExitCallee,
2213                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2214       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2215         ExitArgs(ExitArgs), Conditional(Conditional) {}
2216   void Enter(CodeGenFunction &CGF) override {
2217     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2218     if (Conditional) {
2219       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2220       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2221       ContBlock = CGF.createBasicBlock("omp_if.end");
2222       // Generate the branch (If-stmt)
2223       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2224       CGF.EmitBlock(ThenBlock);
2225     }
2226   }
2227   void Done(CodeGenFunction &CGF) {
2228     // Emit the rest of blocks/branches
2229     CGF.EmitBranch(ContBlock);
2230     CGF.EmitBlock(ContBlock, true);
2231   }
2232   void Exit(CodeGenFunction &CGF) override {
2233     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2234   }
2235 };
2236 } // anonymous namespace
2237 
2238 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2239                                          StringRef CriticalName,
2240                                          const RegionCodeGenTy &CriticalOpGen,
2241                                          SourceLocation Loc, const Expr *Hint) {
2242   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2243   // CriticalOpGen();
2244   // __kmpc_end_critical(ident_t *, gtid, Lock);
2245   // Prepare arguments and build a call to __kmpc_critical
2246   if (!CGF.HaveInsertPoint())
2247     return;
2248   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2249                          getCriticalRegionLock(CriticalName)};
2250   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2251                                                 std::end(Args));
2252   if (Hint) {
2253     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2254         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2255   }
2256   CommonActionTy Action(
2257       OMPBuilder.getOrCreateRuntimeFunction(
2258           CGM.getModule(),
2259           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2260       EnterArgs,
2261       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2262                                             OMPRTL___kmpc_end_critical),
2263       Args);
2264   CriticalOpGen.setAction(Action);
2265   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2266 }
2267 
2268 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2269                                        const RegionCodeGenTy &MasterOpGen,
2270                                        SourceLocation Loc) {
2271   if (!CGF.HaveInsertPoint())
2272     return;
2273   // if(__kmpc_master(ident_t *, gtid)) {
2274   //   MasterOpGen();
2275   //   __kmpc_end_master(ident_t *, gtid);
2276   // }
2277   // Prepare arguments and build a call to __kmpc_master
2278   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2279   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2280                             CGM.getModule(), OMPRTL___kmpc_master),
2281                         Args,
2282                         OMPBuilder.getOrCreateRuntimeFunction(
2283                             CGM.getModule(), OMPRTL___kmpc_end_master),
2284                         Args,
2285                         /*Conditional=*/true);
2286   MasterOpGen.setAction(Action);
2287   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2288   Action.Done(CGF);
2289 }
2290 
2291 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2292                                        const RegionCodeGenTy &MaskedOpGen,
2293                                        SourceLocation Loc, const Expr *Filter) {
2294   if (!CGF.HaveInsertPoint())
2295     return;
2296   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2297   //   MaskedOpGen();
2298   //   __kmpc_end_masked(iden_t *, gtid);
2299   // }
2300   // Prepare arguments and build a call to __kmpc_masked
2301   llvm::Value *FilterVal = Filter
2302                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2303                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2304   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2305                          FilterVal};
2306   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2307                             getThreadID(CGF, Loc)};
2308   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2309                             CGM.getModule(), OMPRTL___kmpc_masked),
2310                         Args,
2311                         OMPBuilder.getOrCreateRuntimeFunction(
2312                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2313                         ArgsEnd,
2314                         /*Conditional=*/true);
2315   MaskedOpGen.setAction(Action);
2316   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2317   Action.Done(CGF);
2318 }
2319 
2320 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2321                                         SourceLocation Loc) {
2322   if (!CGF.HaveInsertPoint())
2323     return;
2324   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2325     OMPBuilder.createTaskyield(CGF.Builder);
2326   } else {
2327     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2328     llvm::Value *Args[] = {
2329         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2330         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2331     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2332                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2333                         Args);
2334   }
2335 
2336   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2337     Region->emitUntiedSwitch(CGF);
2338 }
2339 
2340 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2341                                           const RegionCodeGenTy &TaskgroupOpGen,
2342                                           SourceLocation Loc) {
2343   if (!CGF.HaveInsertPoint())
2344     return;
2345   // __kmpc_taskgroup(ident_t *, gtid);
2346   // TaskgroupOpGen();
2347   // __kmpc_end_taskgroup(ident_t *, gtid);
2348   // Prepare arguments and build a call to __kmpc_taskgroup
2349   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2350   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2351                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2352                         Args,
2353                         OMPBuilder.getOrCreateRuntimeFunction(
2354                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2355                         Args);
2356   TaskgroupOpGen.setAction(Action);
2357   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2358 }
2359 
2360 /// Given an array of pointers to variables, project the address of a
2361 /// given variable.
2362 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2363                                       unsigned Index, const VarDecl *Var) {
2364   // Pull out the pointer to the variable.
2365   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2366   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2367 
2368   Address Addr = Address::deprecated(Ptr, CGF.getContext().getDeclAlign(Var));
2369   Addr = CGF.Builder.CreateElementBitCast(
2370       Addr, CGF.ConvertTypeForMem(Var->getType()));
2371   return Addr;
2372 }
2373 
2374 static llvm::Value *emitCopyprivateCopyFunction(
2375     CodeGenModule &CGM, llvm::Type *ArgsType,
2376     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2377     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2378     SourceLocation Loc) {
2379   ASTContext &C = CGM.getContext();
2380   // void copy_func(void *LHSArg, void *RHSArg);
2381   FunctionArgList Args;
2382   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2383                            ImplicitParamDecl::Other);
2384   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2385                            ImplicitParamDecl::Other);
2386   Args.push_back(&LHSArg);
2387   Args.push_back(&RHSArg);
2388   const auto &CGFI =
2389       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2390   std::string Name =
2391       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2392   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2393                                     llvm::GlobalValue::InternalLinkage, Name,
2394                                     &CGM.getModule());
2395   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2396   Fn->setDoesNotRecurse();
2397   CodeGenFunction CGF(CGM);
2398   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2399   // Dest = (void*[n])(LHSArg);
2400   // Src = (void*[n])(RHSArg);
2401   Address LHS = Address::deprecated(
2402       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2403           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType),
2404       CGF.getPointerAlign());
2405   Address RHS = Address::deprecated(
2406       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType),
2408       CGF.getPointerAlign());
2409   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2410   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2411   // ...
2412   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2413   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2414     const auto *DestVar =
2415         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2416     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2417 
2418     const auto *SrcVar =
2419         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2420     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2421 
2422     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2423     QualType Type = VD->getType();
2424     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2425   }
2426   CGF.FinishFunction();
2427   return Fn;
2428 }
2429 
2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2431                                        const RegionCodeGenTy &SingleOpGen,
2432                                        SourceLocation Loc,
2433                                        ArrayRef<const Expr *> CopyprivateVars,
2434                                        ArrayRef<const Expr *> SrcExprs,
2435                                        ArrayRef<const Expr *> DstExprs,
2436                                        ArrayRef<const Expr *> AssignmentOps) {
2437   if (!CGF.HaveInsertPoint())
2438     return;
2439   assert(CopyprivateVars.size() == SrcExprs.size() &&
2440          CopyprivateVars.size() == DstExprs.size() &&
2441          CopyprivateVars.size() == AssignmentOps.size());
2442   ASTContext &C = CGM.getContext();
2443   // int32 did_it = 0;
2444   // if(__kmpc_single(ident_t *, gtid)) {
2445   //   SingleOpGen();
2446   //   __kmpc_end_single(ident_t *, gtid);
2447   //   did_it = 1;
2448   // }
2449   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2450   // <copy_func>, did_it);
2451 
2452   Address DidIt = Address::invalid();
2453   if (!CopyprivateVars.empty()) {
2454     // int32 did_it = 0;
2455     QualType KmpInt32Ty =
2456         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2457     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2458     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2459   }
2460   // Prepare arguments and build a call to __kmpc_single
2461   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2462   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2463                             CGM.getModule(), OMPRTL___kmpc_single),
2464                         Args,
2465                         OMPBuilder.getOrCreateRuntimeFunction(
2466                             CGM.getModule(), OMPRTL___kmpc_end_single),
2467                         Args,
2468                         /*Conditional=*/true);
2469   SingleOpGen.setAction(Action);
2470   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2471   if (DidIt.isValid()) {
2472     // did_it = 1;
2473     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2474   }
2475   Action.Done(CGF);
2476   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2477   // <copy_func>, did_it);
2478   if (DidIt.isValid()) {
2479     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2480     QualType CopyprivateArrayTy = C.getConstantArrayType(
2481         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2482         /*IndexTypeQuals=*/0);
2483     // Create a list of all private variables for copyprivate.
2484     Address CopyprivateList =
2485         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2486     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2487       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2488       CGF.Builder.CreateStore(
2489           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2490               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2491               CGF.VoidPtrTy),
2492           Elem);
2493     }
2494     // Build function that copies private values from single region to all other
2495     // threads in the corresponding parallel region.
2496     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2497         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2498         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2499     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2500     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2501         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2502     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2503     llvm::Value *Args[] = {
2504         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2505         getThreadID(CGF, Loc),        // i32 <gtid>
2506         BufSize,                      // size_t <buf_size>
2507         CL.getPointer(),              // void *<copyprivate list>
2508         CpyFn,                        // void (*) (void *, void *) <copy_func>
2509         DidItVal                      // i32 did_it
2510     };
2511     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2512                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2513                         Args);
2514   }
2515 }
2516 
2517 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2518                                         const RegionCodeGenTy &OrderedOpGen,
2519                                         SourceLocation Loc, bool IsThreads) {
2520   if (!CGF.HaveInsertPoint())
2521     return;
2522   // __kmpc_ordered(ident_t *, gtid);
2523   // OrderedOpGen();
2524   // __kmpc_end_ordered(ident_t *, gtid);
2525   // Prepare arguments and build a call to __kmpc_ordered
2526   if (IsThreads) {
2527     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2528     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2529                               CGM.getModule(), OMPRTL___kmpc_ordered),
2530                           Args,
2531                           OMPBuilder.getOrCreateRuntimeFunction(
2532                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2533                           Args);
2534     OrderedOpGen.setAction(Action);
2535     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2536     return;
2537   }
2538   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2539 }
2540 
2541 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2542   unsigned Flags;
2543   if (Kind == OMPD_for)
2544     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2545   else if (Kind == OMPD_sections)
2546     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2547   else if (Kind == OMPD_single)
2548     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2549   else if (Kind == OMPD_barrier)
2550     Flags = OMP_IDENT_BARRIER_EXPL;
2551   else
2552     Flags = OMP_IDENT_BARRIER_IMPL;
2553   return Flags;
2554 }
2555 
2556 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2557     CodeGenFunction &CGF, const OMPLoopDirective &S,
2558     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2559   // Check if the loop directive is actually a doacross loop directive. In this
2560   // case choose static, 1 schedule.
2561   if (llvm::any_of(
2562           S.getClausesOfKind<OMPOrderedClause>(),
2563           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2564     ScheduleKind = OMPC_SCHEDULE_static;
2565     // Chunk size is 1 in this case.
2566     llvm::APInt ChunkSize(32, 1);
2567     ChunkExpr = IntegerLiteral::Create(
2568         CGF.getContext(), ChunkSize,
2569         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2570         SourceLocation());
2571   }
2572 }
2573 
2574 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2575                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2576                                       bool ForceSimpleCall) {
2577   // Check if we should use the OMPBuilder
2578   auto *OMPRegionInfo =
2579       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2580   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2581     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2582         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2583     return;
2584   }
2585 
2586   if (!CGF.HaveInsertPoint())
2587     return;
2588   // Build call __kmpc_cancel_barrier(loc, thread_id);
2589   // Build call __kmpc_barrier(loc, thread_id);
2590   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2591   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2592   // thread_id);
2593   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2594                          getThreadID(CGF, Loc)};
2595   if (OMPRegionInfo) {
2596     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2597       llvm::Value *Result = CGF.EmitRuntimeCall(
2598           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2599                                                 OMPRTL___kmpc_cancel_barrier),
2600           Args);
2601       if (EmitChecks) {
2602         // if (__kmpc_cancel_barrier()) {
2603         //   exit from construct;
2604         // }
2605         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2606         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2607         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2608         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2609         CGF.EmitBlock(ExitBB);
2610         //   exit from construct;
2611         CodeGenFunction::JumpDest CancelDestination =
2612             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2613         CGF.EmitBranchThroughCleanup(CancelDestination);
2614         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2615       }
2616       return;
2617     }
2618   }
2619   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2620                           CGM.getModule(), OMPRTL___kmpc_barrier),
2621                       Args);
2622 }
2623 
2624 /// Map the OpenMP loop schedule to the runtime enumeration.
2625 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2626                                           bool Chunked, bool Ordered) {
2627   switch (ScheduleKind) {
2628   case OMPC_SCHEDULE_static:
2629     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2630                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2631   case OMPC_SCHEDULE_dynamic:
2632     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2633   case OMPC_SCHEDULE_guided:
2634     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2635   case OMPC_SCHEDULE_runtime:
2636     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2637   case OMPC_SCHEDULE_auto:
2638     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2639   case OMPC_SCHEDULE_unknown:
2640     assert(!Chunked && "chunk was specified but schedule kind not known");
2641     return Ordered ? OMP_ord_static : OMP_sch_static;
2642   }
2643   llvm_unreachable("Unexpected runtime schedule");
2644 }
2645 
2646 /// Map the OpenMP distribute schedule to the runtime enumeration.
2647 static OpenMPSchedType
2648 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2649   // only static is allowed for dist_schedule
2650   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2651 }
2652 
2653 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2654                                          bool Chunked) const {
2655   OpenMPSchedType Schedule =
2656       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2657   return Schedule == OMP_sch_static;
2658 }
2659 
2660 bool CGOpenMPRuntime::isStaticNonchunked(
2661     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2662   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2663   return Schedule == OMP_dist_sch_static;
2664 }
2665 
2666 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2667                                       bool Chunked) const {
2668   OpenMPSchedType Schedule =
2669       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2670   return Schedule == OMP_sch_static_chunked;
2671 }
2672 
2673 bool CGOpenMPRuntime::isStaticChunked(
2674     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2675   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2676   return Schedule == OMP_dist_sch_static_chunked;
2677 }
2678 
2679 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2680   OpenMPSchedType Schedule =
2681       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2682   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2683   return Schedule != OMP_sch_static;
2684 }
2685 
2686 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2687                                   OpenMPScheduleClauseModifier M1,
2688                                   OpenMPScheduleClauseModifier M2) {
2689   int Modifier = 0;
2690   switch (M1) {
2691   case OMPC_SCHEDULE_MODIFIER_monotonic:
2692     Modifier = OMP_sch_modifier_monotonic;
2693     break;
2694   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2695     Modifier = OMP_sch_modifier_nonmonotonic;
2696     break;
2697   case OMPC_SCHEDULE_MODIFIER_simd:
2698     if (Schedule == OMP_sch_static_chunked)
2699       Schedule = OMP_sch_static_balanced_chunked;
2700     break;
2701   case OMPC_SCHEDULE_MODIFIER_last:
2702   case OMPC_SCHEDULE_MODIFIER_unknown:
2703     break;
2704   }
2705   switch (M2) {
2706   case OMPC_SCHEDULE_MODIFIER_monotonic:
2707     Modifier = OMP_sch_modifier_monotonic;
2708     break;
2709   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2710     Modifier = OMP_sch_modifier_nonmonotonic;
2711     break;
2712   case OMPC_SCHEDULE_MODIFIER_simd:
2713     if (Schedule == OMP_sch_static_chunked)
2714       Schedule = OMP_sch_static_balanced_chunked;
2715     break;
2716   case OMPC_SCHEDULE_MODIFIER_last:
2717   case OMPC_SCHEDULE_MODIFIER_unknown:
2718     break;
2719   }
2720   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2721   // If the static schedule kind is specified or if the ordered clause is
2722   // specified, and if the nonmonotonic modifier is not specified, the effect is
2723   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2724   // modifier is specified, the effect is as if the nonmonotonic modifier is
2725   // specified.
2726   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2727     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2728           Schedule == OMP_sch_static_balanced_chunked ||
2729           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2730           Schedule == OMP_dist_sch_static_chunked ||
2731           Schedule == OMP_dist_sch_static))
2732       Modifier = OMP_sch_modifier_nonmonotonic;
2733   }
2734   return Schedule | Modifier;
2735 }
2736 
2737 void CGOpenMPRuntime::emitForDispatchInit(
2738     CodeGenFunction &CGF, SourceLocation Loc,
2739     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2740     bool Ordered, const DispatchRTInput &DispatchValues) {
2741   if (!CGF.HaveInsertPoint())
2742     return;
2743   OpenMPSchedType Schedule = getRuntimeSchedule(
2744       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2745   assert(Ordered ||
2746          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2747           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2748           Schedule != OMP_sch_static_balanced_chunked));
2749   // Call __kmpc_dispatch_init(
2750   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2751   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2752   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2753 
2754   // If the Chunk was not specified in the clause - use default value 1.
2755   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2756                                             : CGF.Builder.getIntN(IVSize, 1);
2757   llvm::Value *Args[] = {
2758       emitUpdateLocation(CGF, Loc),
2759       getThreadID(CGF, Loc),
2760       CGF.Builder.getInt32(addMonoNonMonoModifier(
2761           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2762       DispatchValues.LB,                                     // Lower
2763       DispatchValues.UB,                                     // Upper
2764       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2765       Chunk                                                  // Chunk
2766   };
2767   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2768 }
2769 
2770 static void emitForStaticInitCall(
2771     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2772     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2773     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2774     const CGOpenMPRuntime::StaticRTInput &Values) {
2775   if (!CGF.HaveInsertPoint())
2776     return;
2777 
2778   assert(!Values.Ordered);
2779   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2780          Schedule == OMP_sch_static_balanced_chunked ||
2781          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2782          Schedule == OMP_dist_sch_static ||
2783          Schedule == OMP_dist_sch_static_chunked);
2784 
2785   // Call __kmpc_for_static_init(
2786   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2787   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2788   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2789   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2790   llvm::Value *Chunk = Values.Chunk;
2791   if (Chunk == nullptr) {
2792     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2793             Schedule == OMP_dist_sch_static) &&
2794            "expected static non-chunked schedule");
2795     // If the Chunk was not specified in the clause - use default value 1.
2796     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2797   } else {
2798     assert((Schedule == OMP_sch_static_chunked ||
2799             Schedule == OMP_sch_static_balanced_chunked ||
2800             Schedule == OMP_ord_static_chunked ||
2801             Schedule == OMP_dist_sch_static_chunked) &&
2802            "expected static chunked schedule");
2803   }
2804   llvm::Value *Args[] = {
2805       UpdateLocation,
2806       ThreadId,
2807       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2808                                                   M2)), // Schedule type
2809       Values.IL.getPointer(),                           // &isLastIter
2810       Values.LB.getPointer(),                           // &LB
2811       Values.UB.getPointer(),                           // &UB
2812       Values.ST.getPointer(),                           // &Stride
2813       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2814       Chunk                                             // Chunk
2815   };
2816   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2817 }
2818 
2819 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2820                                         SourceLocation Loc,
2821                                         OpenMPDirectiveKind DKind,
2822                                         const OpenMPScheduleTy &ScheduleKind,
2823                                         const StaticRTInput &Values) {
2824   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2825       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2826   assert(isOpenMPWorksharingDirective(DKind) &&
2827          "Expected loop-based or sections-based directive.");
2828   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2829                                              isOpenMPLoopDirective(DKind)
2830                                                  ? OMP_IDENT_WORK_LOOP
2831                                                  : OMP_IDENT_WORK_SECTIONS);
2832   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2833   llvm::FunctionCallee StaticInitFunction =
2834       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2835   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2836   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2837                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2838 }
2839 
2840 void CGOpenMPRuntime::emitDistributeStaticInit(
2841     CodeGenFunction &CGF, SourceLocation Loc,
2842     OpenMPDistScheduleClauseKind SchedKind,
2843     const CGOpenMPRuntime::StaticRTInput &Values) {
2844   OpenMPSchedType ScheduleNum =
2845       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2846   llvm::Value *UpdatedLocation =
2847       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2848   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2849   llvm::FunctionCallee StaticInitFunction;
2850   bool isGPUDistribute =
2851       CGM.getLangOpts().OpenMPIsDevice &&
2852       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2853   StaticInitFunction = createForStaticInitFunction(
2854       Values.IVSize, Values.IVSigned, isGPUDistribute);
2855 
2856   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2857                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2858                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2859 }
2860 
2861 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2862                                           SourceLocation Loc,
2863                                           OpenMPDirectiveKind DKind) {
2864   if (!CGF.HaveInsertPoint())
2865     return;
2866   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2867   llvm::Value *Args[] = {
2868       emitUpdateLocation(CGF, Loc,
2869                          isOpenMPDistributeDirective(DKind)
2870                              ? OMP_IDENT_WORK_DISTRIBUTE
2871                              : isOpenMPLoopDirective(DKind)
2872                                    ? OMP_IDENT_WORK_LOOP
2873                                    : OMP_IDENT_WORK_SECTIONS),
2874       getThreadID(CGF, Loc)};
2875   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2876   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2877       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2878     CGF.EmitRuntimeCall(
2879         OMPBuilder.getOrCreateRuntimeFunction(
2880             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2881         Args);
2882   else
2883     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2884                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2885                         Args);
2886 }
2887 
2888 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2889                                                  SourceLocation Loc,
2890                                                  unsigned IVSize,
2891                                                  bool IVSigned) {
2892   if (!CGF.HaveInsertPoint())
2893     return;
2894   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2895   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2896   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2897 }
2898 
2899 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2900                                           SourceLocation Loc, unsigned IVSize,
2901                                           bool IVSigned, Address IL,
2902                                           Address LB, Address UB,
2903                                           Address ST) {
2904   // Call __kmpc_dispatch_next(
2905   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2906   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2907   //          kmp_int[32|64] *p_stride);
2908   llvm::Value *Args[] = {
2909       emitUpdateLocation(CGF, Loc),
2910       getThreadID(CGF, Loc),
2911       IL.getPointer(), // &isLastIter
2912       LB.getPointer(), // &Lower
2913       UB.getPointer(), // &Upper
2914       ST.getPointer()  // &Stride
2915   };
2916   llvm::Value *Call =
2917       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2918   return CGF.EmitScalarConversion(
2919       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2920       CGF.getContext().BoolTy, Loc);
2921 }
2922 
2923 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2924                                            llvm::Value *NumThreads,
2925                                            SourceLocation Loc) {
2926   if (!CGF.HaveInsertPoint())
2927     return;
2928   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2929   llvm::Value *Args[] = {
2930       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2931       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2932   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2933                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2934                       Args);
2935 }
2936 
2937 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2938                                          ProcBindKind ProcBind,
2939                                          SourceLocation Loc) {
2940   if (!CGF.HaveInsertPoint())
2941     return;
2942   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2943   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2944   llvm::Value *Args[] = {
2945       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2946       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2947   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2948                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2949                       Args);
2950 }
2951 
2952 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2953                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2954   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2955     OMPBuilder.createFlush(CGF.Builder);
2956   } else {
2957     if (!CGF.HaveInsertPoint())
2958       return;
2959     // Build call void __kmpc_flush(ident_t *loc)
2960     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2961                             CGM.getModule(), OMPRTL___kmpc_flush),
2962                         emitUpdateLocation(CGF, Loc));
2963   }
2964 }
2965 
2966 namespace {
2967 /// Indexes of fields for type kmp_task_t.
2968 enum KmpTaskTFields {
2969   /// List of shared variables.
2970   KmpTaskTShareds,
2971   /// Task routine.
2972   KmpTaskTRoutine,
2973   /// Partition id for the untied tasks.
2974   KmpTaskTPartId,
2975   /// Function with call of destructors for private variables.
2976   Data1,
2977   /// Task priority.
2978   Data2,
2979   /// (Taskloops only) Lower bound.
2980   KmpTaskTLowerBound,
2981   /// (Taskloops only) Upper bound.
2982   KmpTaskTUpperBound,
2983   /// (Taskloops only) Stride.
2984   KmpTaskTStride,
2985   /// (Taskloops only) Is last iteration flag.
2986   KmpTaskTLastIter,
2987   /// (Taskloops only) Reduction data.
2988   KmpTaskTReductions,
2989 };
2990 } // anonymous namespace
2991 
2992 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2993   return OffloadEntriesTargetRegion.empty() &&
2994          OffloadEntriesDeviceGlobalVar.empty();
2995 }
2996 
2997 /// Initialize target region entry.
2998 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2999     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3000                                     StringRef ParentName, unsigned LineNum,
3001                                     unsigned Order) {
3002   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3003                                              "only required for the device "
3004                                              "code generation.");
3005   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3006       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3007                                    OMPTargetRegionEntryTargetRegion);
3008   ++OffloadingEntriesNum;
3009 }
3010 
3011 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3012     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3013                                   StringRef ParentName, unsigned LineNum,
3014                                   llvm::Constant *Addr, llvm::Constant *ID,
3015                                   OMPTargetRegionEntryKind Flags) {
3016   // If we are emitting code for a target, the entry is already initialized,
3017   // only has to be registered.
3018   if (CGM.getLangOpts().OpenMPIsDevice) {
3019     // This could happen if the device compilation is invoked standalone.
3020     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3021       return;
3022     auto &Entry =
3023         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3024     Entry.setAddress(Addr);
3025     Entry.setID(ID);
3026     Entry.setFlags(Flags);
3027   } else {
3028     if (Flags ==
3029             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3030         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3031                                  /*IgnoreAddressId*/ true))
3032       return;
3033     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3034            "Target region entry already registered!");
3035     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3036     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3037     ++OffloadingEntriesNum;
3038   }
3039 }
3040 
3041 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3042     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3043     bool IgnoreAddressId) const {
3044   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3045   if (PerDevice == OffloadEntriesTargetRegion.end())
3046     return false;
3047   auto PerFile = PerDevice->second.find(FileID);
3048   if (PerFile == PerDevice->second.end())
3049     return false;
3050   auto PerParentName = PerFile->second.find(ParentName);
3051   if (PerParentName == PerFile->second.end())
3052     return false;
3053   auto PerLine = PerParentName->second.find(LineNum);
3054   if (PerLine == PerParentName->second.end())
3055     return false;
3056   // Fail if this entry is already registered.
3057   if (!IgnoreAddressId &&
3058       (PerLine->second.getAddress() || PerLine->second.getID()))
3059     return false;
3060   return true;
3061 }
3062 
3063 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3064     const OffloadTargetRegionEntryInfoActTy &Action) {
3065   // Scan all target region entries and perform the provided action.
3066   for (const auto &D : OffloadEntriesTargetRegion)
3067     for (const auto &F : D.second)
3068       for (const auto &P : F.second)
3069         for (const auto &L : P.second)
3070           Action(D.first, F.first, P.first(), L.first, L.second);
3071 }
3072 
3073 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3074     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3075                                        OMPTargetGlobalVarEntryKind Flags,
3076                                        unsigned Order) {
3077   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3078                                              "only required for the device "
3079                                              "code generation.");
3080   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3081   ++OffloadingEntriesNum;
3082 }
3083 
3084 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3085     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3086                                      CharUnits VarSize,
3087                                      OMPTargetGlobalVarEntryKind Flags,
3088                                      llvm::GlobalValue::LinkageTypes Linkage) {
3089   if (CGM.getLangOpts().OpenMPIsDevice) {
3090     // This could happen if the device compilation is invoked standalone.
3091     if (!hasDeviceGlobalVarEntryInfo(VarName))
3092       return;
3093     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3094     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3095       if (Entry.getVarSize().isZero()) {
3096         Entry.setVarSize(VarSize);
3097         Entry.setLinkage(Linkage);
3098       }
3099       return;
3100     }
3101     Entry.setVarSize(VarSize);
3102     Entry.setLinkage(Linkage);
3103     Entry.setAddress(Addr);
3104   } else {
3105     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3106       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3107       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3108              "Entry not initialized!");
3109       if (Entry.getVarSize().isZero()) {
3110         Entry.setVarSize(VarSize);
3111         Entry.setLinkage(Linkage);
3112       }
3113       return;
3114     }
3115     OffloadEntriesDeviceGlobalVar.try_emplace(
3116         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3117     ++OffloadingEntriesNum;
3118   }
3119 }
3120 
3121 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3122     actOnDeviceGlobalVarEntriesInfo(
3123         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3124   // Scan all target region entries and perform the provided action.
3125   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3126     Action(E.getKey(), E.getValue());
3127 }
3128 
3129 void CGOpenMPRuntime::createOffloadEntry(
3130     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3131     llvm::GlobalValue::LinkageTypes Linkage) {
3132   StringRef Name = Addr->getName();
3133   llvm::Module &M = CGM.getModule();
3134   llvm::LLVMContext &C = M.getContext();
3135 
3136   // Create constant string with the name.
3137   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3138 
3139   std::string StringName = getName({"omp_offloading", "entry_name"});
3140   auto *Str = new llvm::GlobalVariable(
3141       M, StrPtrInit->getType(), /*isConstant=*/true,
3142       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3143   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3144 
3145   llvm::Constant *Data[] = {
3146       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3147       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3148       llvm::ConstantInt::get(CGM.SizeTy, Size),
3149       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3150       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3151   std::string EntryName = getName({"omp_offloading", "entry", ""});
3152   llvm::GlobalVariable *Entry = createGlobalStruct(
3153       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3154       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3155 
3156   // The entry has to be created in the section the linker expects it to be.
3157   Entry->setSection("omp_offloading_entries");
3158 }
3159 
3160 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3161   // Emit the offloading entries and metadata so that the device codegen side
3162   // can easily figure out what to emit. The produced metadata looks like
3163   // this:
3164   //
3165   // !omp_offload.info = !{!1, ...}
3166   //
3167   // Right now we only generate metadata for function that contain target
3168   // regions.
3169 
3170   // If we are in simd mode or there are no entries, we don't need to do
3171   // anything.
3172   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3173     return;
3174 
3175   llvm::Module &M = CGM.getModule();
3176   llvm::LLVMContext &C = M.getContext();
3177   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3178                          SourceLocation, StringRef>,
3179               16>
3180       OrderedEntries(OffloadEntriesInfoManager.size());
3181   llvm::SmallVector<StringRef, 16> ParentFunctions(
3182       OffloadEntriesInfoManager.size());
3183 
3184   // Auxiliary methods to create metadata values and strings.
3185   auto &&GetMDInt = [this](unsigned V) {
3186     return llvm::ConstantAsMetadata::get(
3187         llvm::ConstantInt::get(CGM.Int32Ty, V));
3188   };
3189 
3190   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3191 
3192   // Create the offloading info metadata node.
3193   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3194 
3195   // Create function that emits metadata for each target region entry;
3196   auto &&TargetRegionMetadataEmitter =
3197       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3198        &GetMDString](
3199           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3200           unsigned Line,
3201           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3202         // Generate metadata for target regions. Each entry of this metadata
3203         // contains:
3204         // - Entry 0 -> Kind of this type of metadata (0).
3205         // - Entry 1 -> Device ID of the file where the entry was identified.
3206         // - Entry 2 -> File ID of the file where the entry was identified.
3207         // - Entry 3 -> Mangled name of the function where the entry was
3208         // identified.
3209         // - Entry 4 -> Line in the file where the entry was identified.
3210         // - Entry 5 -> Order the entry was created.
3211         // The first element of the metadata node is the kind.
3212         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3213                                  GetMDInt(FileID),      GetMDString(ParentName),
3214                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3215 
3216         SourceLocation Loc;
3217         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3218                   E = CGM.getContext().getSourceManager().fileinfo_end();
3219              I != E; ++I) {
3220           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3221               I->getFirst()->getUniqueID().getFile() == FileID) {
3222             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3223                 I->getFirst(), Line, 1);
3224             break;
3225           }
3226         }
3227         // Save this entry in the right position of the ordered entries array.
3228         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3229         ParentFunctions[E.getOrder()] = ParentName;
3230 
3231         // Add metadata to the named metadata node.
3232         MD->addOperand(llvm::MDNode::get(C, Ops));
3233       };
3234 
3235   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3236       TargetRegionMetadataEmitter);
3237 
3238   // Create function that emits metadata for each device global variable entry;
3239   auto &&DeviceGlobalVarMetadataEmitter =
3240       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3241        MD](StringRef MangledName,
3242            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3243                &E) {
3244         // Generate metadata for global variables. Each entry of this metadata
3245         // contains:
3246         // - Entry 0 -> Kind of this type of metadata (1).
3247         // - Entry 1 -> Mangled name of the variable.
3248         // - Entry 2 -> Declare target kind.
3249         // - Entry 3 -> Order the entry was created.
3250         // The first element of the metadata node is the kind.
3251         llvm::Metadata *Ops[] = {
3252             GetMDInt(E.getKind()), GetMDString(MangledName),
3253             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3254 
3255         // Save this entry in the right position of the ordered entries array.
3256         OrderedEntries[E.getOrder()] =
3257             std::make_tuple(&E, SourceLocation(), MangledName);
3258 
3259         // Add metadata to the named metadata node.
3260         MD->addOperand(llvm::MDNode::get(C, Ops));
3261       };
3262 
3263   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3264       DeviceGlobalVarMetadataEmitter);
3265 
3266   for (const auto &E : OrderedEntries) {
3267     assert(std::get<0>(E) && "All ordered entries must exist!");
3268     if (const auto *CE =
3269             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3270                 std::get<0>(E))) {
3271       if (!CE->getID() || !CE->getAddress()) {
3272         // Do not blame the entry if the parent funtion is not emitted.
3273         StringRef FnName = ParentFunctions[CE->getOrder()];
3274         if (!CGM.GetGlobalValue(FnName))
3275           continue;
3276         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3277             DiagnosticsEngine::Error,
3278             "Offloading entry for target region in %0 is incorrect: either the "
3279             "address or the ID is invalid.");
3280         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3281         continue;
3282       }
3283       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3284                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3285     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3286                                              OffloadEntryInfoDeviceGlobalVar>(
3287                    std::get<0>(E))) {
3288       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3289           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3290               CE->getFlags());
3291       switch (Flags) {
3292       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3293         if (CGM.getLangOpts().OpenMPIsDevice &&
3294             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3295           continue;
3296         if (!CE->getAddress()) {
3297           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3298               DiagnosticsEngine::Error, "Offloading entry for declare target "
3299                                         "variable %0 is incorrect: the "
3300                                         "address is invalid.");
3301           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3302           continue;
3303         }
3304         // The vaiable has no definition - no need to add the entry.
3305         if (CE->getVarSize().isZero())
3306           continue;
3307         break;
3308       }
3309       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3310         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3311                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3312                "Declaret target link address is set.");
3313         if (CGM.getLangOpts().OpenMPIsDevice)
3314           continue;
3315         if (!CE->getAddress()) {
3316           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3317               DiagnosticsEngine::Error,
3318               "Offloading entry for declare target variable is incorrect: the "
3319               "address is invalid.");
3320           CGM.getDiags().Report(DiagID);
3321           continue;
3322         }
3323         break;
3324       }
3325       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3326                          CE->getVarSize().getQuantity(), Flags,
3327                          CE->getLinkage());
3328     } else {
3329       llvm_unreachable("Unsupported entry kind.");
3330     }
3331   }
3332 }
3333 
3334 /// Loads all the offload entries information from the host IR
3335 /// metadata.
3336 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3337   // If we are in target mode, load the metadata from the host IR. This code has
3338   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3339 
3340   if (!CGM.getLangOpts().OpenMPIsDevice)
3341     return;
3342 
3343   if (CGM.getLangOpts().OMPHostIRFile.empty())
3344     return;
3345 
3346   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3347   if (auto EC = Buf.getError()) {
3348     CGM.getDiags().Report(diag::err_cannot_open_file)
3349         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3350     return;
3351   }
3352 
3353   llvm::LLVMContext C;
3354   auto ME = expectedToErrorOrAndEmitErrors(
3355       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3356 
3357   if (auto EC = ME.getError()) {
3358     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3359         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3360     CGM.getDiags().Report(DiagID)
3361         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3362     return;
3363   }
3364 
3365   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3366   if (!MD)
3367     return;
3368 
3369   for (llvm::MDNode *MN : MD->operands()) {
3370     auto &&GetMDInt = [MN](unsigned Idx) {
3371       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3372       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3373     };
3374 
3375     auto &&GetMDString = [MN](unsigned Idx) {
3376       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3377       return V->getString();
3378     };
3379 
3380     switch (GetMDInt(0)) {
3381     default:
3382       llvm_unreachable("Unexpected metadata!");
3383       break;
3384     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3385         OffloadingEntryInfoTargetRegion:
3386       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3387           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3388           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3389           /*Order=*/GetMDInt(5));
3390       break;
3391     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3392         OffloadingEntryInfoDeviceGlobalVar:
3393       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3394           /*MangledName=*/GetMDString(1),
3395           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3396               /*Flags=*/GetMDInt(2)),
3397           /*Order=*/GetMDInt(3));
3398       break;
3399     }
3400   }
3401 }
3402 
3403 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3404   if (!KmpRoutineEntryPtrTy) {
3405     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3406     ASTContext &C = CGM.getContext();
3407     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3408     FunctionProtoType::ExtProtoInfo EPI;
3409     KmpRoutineEntryPtrQTy = C.getPointerType(
3410         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3411     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3412   }
3413 }
3414 
3415 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3416   // Make sure the type of the entry is already created. This is the type we
3417   // have to create:
3418   // struct __tgt_offload_entry{
3419   //   void      *addr;       // Pointer to the offload entry info.
3420   //                          // (function or global)
3421   //   char      *name;       // Name of the function or global.
3422   //   size_t     size;       // Size of the entry info (0 if it a function).
3423   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3424   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3425   // };
3426   if (TgtOffloadEntryQTy.isNull()) {
3427     ASTContext &C = CGM.getContext();
3428     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3429     RD->startDefinition();
3430     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3431     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3432     addFieldToRecordDecl(C, RD, C.getSizeType());
3433     addFieldToRecordDecl(
3434         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3435     addFieldToRecordDecl(
3436         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3437     RD->completeDefinition();
3438     RD->addAttr(PackedAttr::CreateImplicit(C));
3439     TgtOffloadEntryQTy = C.getRecordType(RD);
3440   }
3441   return TgtOffloadEntryQTy;
3442 }
3443 
3444 namespace {
3445 struct PrivateHelpersTy {
3446   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3447                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3448       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3449         PrivateElemInit(PrivateElemInit) {}
3450   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3451   const Expr *OriginalRef = nullptr;
3452   const VarDecl *Original = nullptr;
3453   const VarDecl *PrivateCopy = nullptr;
3454   const VarDecl *PrivateElemInit = nullptr;
3455   bool isLocalPrivate() const {
3456     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3457   }
3458 };
3459 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3460 } // anonymous namespace
3461 
3462 static bool isAllocatableDecl(const VarDecl *VD) {
3463   const VarDecl *CVD = VD->getCanonicalDecl();
3464   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3465     return false;
3466   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3467   // Use the default allocation.
3468   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3469            !AA->getAllocator());
3470 }
3471 
3472 static RecordDecl *
3473 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3474   if (!Privates.empty()) {
3475     ASTContext &C = CGM.getContext();
3476     // Build struct .kmp_privates_t. {
3477     //         /*  private vars  */
3478     //       };
3479     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3480     RD->startDefinition();
3481     for (const auto &Pair : Privates) {
3482       const VarDecl *VD = Pair.second.Original;
3483       QualType Type = VD->getType().getNonReferenceType();
3484       // If the private variable is a local variable with lvalue ref type,
3485       // allocate the pointer instead of the pointee type.
3486       if (Pair.second.isLocalPrivate()) {
3487         if (VD->getType()->isLValueReferenceType())
3488           Type = C.getPointerType(Type);
3489         if (isAllocatableDecl(VD))
3490           Type = C.getPointerType(Type);
3491       }
3492       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3493       if (VD->hasAttrs()) {
3494         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3495              E(VD->getAttrs().end());
3496              I != E; ++I)
3497           FD->addAttr(*I);
3498       }
3499     }
3500     RD->completeDefinition();
3501     return RD;
3502   }
3503   return nullptr;
3504 }
3505 
3506 static RecordDecl *
3507 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3508                          QualType KmpInt32Ty,
3509                          QualType KmpRoutineEntryPointerQTy) {
3510   ASTContext &C = CGM.getContext();
3511   // Build struct kmp_task_t {
3512   //         void *              shareds;
3513   //         kmp_routine_entry_t routine;
3514   //         kmp_int32           part_id;
3515   //         kmp_cmplrdata_t data1;
3516   //         kmp_cmplrdata_t data2;
3517   // For taskloops additional fields:
3518   //         kmp_uint64          lb;
3519   //         kmp_uint64          ub;
3520   //         kmp_int64           st;
3521   //         kmp_int32           liter;
3522   //         void *              reductions;
3523   //       };
3524   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3525   UD->startDefinition();
3526   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3527   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3528   UD->completeDefinition();
3529   QualType KmpCmplrdataTy = C.getRecordType(UD);
3530   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3531   RD->startDefinition();
3532   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3533   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3534   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3535   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3536   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3537   if (isOpenMPTaskLoopDirective(Kind)) {
3538     QualType KmpUInt64Ty =
3539         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3540     QualType KmpInt64Ty =
3541         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3542     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3543     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3544     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3545     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3546     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3547   }
3548   RD->completeDefinition();
3549   return RD;
3550 }
3551 
3552 static RecordDecl *
3553 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3554                                      ArrayRef<PrivateDataTy> Privates) {
3555   ASTContext &C = CGM.getContext();
3556   // Build struct kmp_task_t_with_privates {
3557   //         kmp_task_t task_data;
3558   //         .kmp_privates_t. privates;
3559   //       };
3560   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3561   RD->startDefinition();
3562   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3563   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3564     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3565   RD->completeDefinition();
3566   return RD;
3567 }
3568 
3569 /// Emit a proxy function which accepts kmp_task_t as the second
3570 /// argument.
3571 /// \code
3572 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3573 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3574 ///   For taskloops:
3575 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3576 ///   tt->reductions, tt->shareds);
3577 ///   return 0;
3578 /// }
3579 /// \endcode
3580 static llvm::Function *
3581 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3582                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3583                       QualType KmpTaskTWithPrivatesPtrQTy,
3584                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3585                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3586                       llvm::Value *TaskPrivatesMap) {
3587   ASTContext &C = CGM.getContext();
3588   FunctionArgList Args;
3589   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3590                             ImplicitParamDecl::Other);
3591   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3592                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3593                                 ImplicitParamDecl::Other);
3594   Args.push_back(&GtidArg);
3595   Args.push_back(&TaskTypeArg);
3596   const auto &TaskEntryFnInfo =
3597       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3598   llvm::FunctionType *TaskEntryTy =
3599       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3600   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3601   auto *TaskEntry = llvm::Function::Create(
3602       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3603   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3604   TaskEntry->setDoesNotRecurse();
3605   CodeGenFunction CGF(CGM);
3606   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3607                     Loc, Loc);
3608 
3609   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3610   // tt,
3611   // For taskloops:
3612   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3613   // tt->task_data.shareds);
3614   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3615       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3616   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3617       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3618       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3619   const auto *KmpTaskTWithPrivatesQTyRD =
3620       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3621   LValue Base =
3622       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3623   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3624   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3625   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3626   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3627 
3628   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3629   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3630   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3631       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3632       CGF.ConvertTypeForMem(SharedsPtrTy));
3633 
3634   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3635   llvm::Value *PrivatesParam;
3636   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3637     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3638     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3639         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3640   } else {
3641     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3642   }
3643 
3644   llvm::Value *CommonArgs[] = {
3645       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3646       CGF.Builder
3647           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3648                                                CGF.VoidPtrTy, CGF.Int8Ty)
3649           .getPointer()};
3650   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3651                                           std::end(CommonArgs));
3652   if (isOpenMPTaskLoopDirective(Kind)) {
3653     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3654     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3655     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3656     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3657     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3658     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3659     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3660     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3661     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3662     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3663     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3664     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3665     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3666     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3667     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3668     CallArgs.push_back(LBParam);
3669     CallArgs.push_back(UBParam);
3670     CallArgs.push_back(StParam);
3671     CallArgs.push_back(LIParam);
3672     CallArgs.push_back(RParam);
3673   }
3674   CallArgs.push_back(SharedsParam);
3675 
3676   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3677                                                   CallArgs);
3678   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3679                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3680   CGF.FinishFunction();
3681   return TaskEntry;
3682 }
3683 
3684 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3685                                             SourceLocation Loc,
3686                                             QualType KmpInt32Ty,
3687                                             QualType KmpTaskTWithPrivatesPtrQTy,
3688                                             QualType KmpTaskTWithPrivatesQTy) {
3689   ASTContext &C = CGM.getContext();
3690   FunctionArgList Args;
3691   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3692                             ImplicitParamDecl::Other);
3693   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3694                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3695                                 ImplicitParamDecl::Other);
3696   Args.push_back(&GtidArg);
3697   Args.push_back(&TaskTypeArg);
3698   const auto &DestructorFnInfo =
3699       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3700   llvm::FunctionType *DestructorFnTy =
3701       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3702   std::string Name =
3703       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3704   auto *DestructorFn =
3705       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3706                              Name, &CGM.getModule());
3707   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3708                                     DestructorFnInfo);
3709   DestructorFn->setDoesNotRecurse();
3710   CodeGenFunction CGF(CGM);
3711   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3712                     Args, Loc, Loc);
3713 
3714   LValue Base = CGF.EmitLoadOfPointerLValue(
3715       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3716       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3717   const auto *KmpTaskTWithPrivatesQTyRD =
3718       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3719   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3720   Base = CGF.EmitLValueForField(Base, *FI);
3721   for (const auto *Field :
3722        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3723     if (QualType::DestructionKind DtorKind =
3724             Field->getType().isDestructedType()) {
3725       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3726       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3727     }
3728   }
3729   CGF.FinishFunction();
3730   return DestructorFn;
3731 }
3732 
3733 /// Emit a privates mapping function for correct handling of private and
3734 /// firstprivate variables.
3735 /// \code
3736 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3737 /// **noalias priv1,...,  <tyn> **noalias privn) {
3738 ///   *priv1 = &.privates.priv1;
3739 ///   ...;
3740 ///   *privn = &.privates.privn;
3741 /// }
3742 /// \endcode
3743 static llvm::Value *
3744 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3745                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3746                                ArrayRef<PrivateDataTy> Privates) {
3747   ASTContext &C = CGM.getContext();
3748   FunctionArgList Args;
3749   ImplicitParamDecl TaskPrivatesArg(
3750       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3751       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3752       ImplicitParamDecl::Other);
3753   Args.push_back(&TaskPrivatesArg);
3754   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3755   unsigned Counter = 1;
3756   for (const Expr *E : Data.PrivateVars) {
3757     Args.push_back(ImplicitParamDecl::Create(
3758         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3759         C.getPointerType(C.getPointerType(E->getType()))
3760             .withConst()
3761             .withRestrict(),
3762         ImplicitParamDecl::Other));
3763     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3764     PrivateVarsPos[VD] = Counter;
3765     ++Counter;
3766   }
3767   for (const Expr *E : Data.FirstprivateVars) {
3768     Args.push_back(ImplicitParamDecl::Create(
3769         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3770         C.getPointerType(C.getPointerType(E->getType()))
3771             .withConst()
3772             .withRestrict(),
3773         ImplicitParamDecl::Other));
3774     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3775     PrivateVarsPos[VD] = Counter;
3776     ++Counter;
3777   }
3778   for (const Expr *E : Data.LastprivateVars) {
3779     Args.push_back(ImplicitParamDecl::Create(
3780         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3781         C.getPointerType(C.getPointerType(E->getType()))
3782             .withConst()
3783             .withRestrict(),
3784         ImplicitParamDecl::Other));
3785     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3786     PrivateVarsPos[VD] = Counter;
3787     ++Counter;
3788   }
3789   for (const VarDecl *VD : Data.PrivateLocals) {
3790     QualType Ty = VD->getType().getNonReferenceType();
3791     if (VD->getType()->isLValueReferenceType())
3792       Ty = C.getPointerType(Ty);
3793     if (isAllocatableDecl(VD))
3794       Ty = C.getPointerType(Ty);
3795     Args.push_back(ImplicitParamDecl::Create(
3796         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3797         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3798         ImplicitParamDecl::Other));
3799     PrivateVarsPos[VD] = Counter;
3800     ++Counter;
3801   }
3802   const auto &TaskPrivatesMapFnInfo =
3803       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3804   llvm::FunctionType *TaskPrivatesMapTy =
3805       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3806   std::string Name =
3807       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3808   auto *TaskPrivatesMap = llvm::Function::Create(
3809       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3810       &CGM.getModule());
3811   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3812                                     TaskPrivatesMapFnInfo);
3813   if (CGM.getLangOpts().Optimize) {
3814     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3815     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3816     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3817   }
3818   CodeGenFunction CGF(CGM);
3819   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3820                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3821 
3822   // *privi = &.privates.privi;
3823   LValue Base = CGF.EmitLoadOfPointerLValue(
3824       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3825       TaskPrivatesArg.getType()->castAs<PointerType>());
3826   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3827   Counter = 0;
3828   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3829     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3830     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3831     LValue RefLVal =
3832         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3833     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3834         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3835     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3836     ++Counter;
3837   }
3838   CGF.FinishFunction();
3839   return TaskPrivatesMap;
3840 }
3841 
3842 /// Emit initialization for private variables in task-based directives.
3843 static void emitPrivatesInit(CodeGenFunction &CGF,
3844                              const OMPExecutableDirective &D,
3845                              Address KmpTaskSharedsPtr, LValue TDBase,
3846                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3847                              QualType SharedsTy, QualType SharedsPtrTy,
3848                              const OMPTaskDataTy &Data,
3849                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3850   ASTContext &C = CGF.getContext();
3851   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3852   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3853   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3854                                  ? OMPD_taskloop
3855                                  : OMPD_task;
3856   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3857   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3858   LValue SrcBase;
3859   bool IsTargetTask =
3860       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3861       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3862   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3863   // PointersArray, SizesArray, and MappersArray. The original variables for
3864   // these arrays are not captured and we get their addresses explicitly.
3865   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3866       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3867     SrcBase = CGF.MakeAddrLValue(
3868         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3869             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3870             CGF.ConvertTypeForMem(SharedsTy)),
3871         SharedsTy);
3872   }
3873   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3874   for (const PrivateDataTy &Pair : Privates) {
3875     // Do not initialize private locals.
3876     if (Pair.second.isLocalPrivate()) {
3877       ++FI;
3878       continue;
3879     }
3880     const VarDecl *VD = Pair.second.PrivateCopy;
3881     const Expr *Init = VD->getAnyInitializer();
3882     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3883                              !CGF.isTrivialInitializer(Init)))) {
3884       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3885       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3886         const VarDecl *OriginalVD = Pair.second.Original;
3887         // Check if the variable is the target-based BasePointersArray,
3888         // PointersArray, SizesArray, or MappersArray.
3889         LValue SharedRefLValue;
3890         QualType Type = PrivateLValue.getType();
3891         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3892         if (IsTargetTask && !SharedField) {
3893           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3894                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3895                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3896                          ->getNumParams() == 0 &&
3897                  isa<TranslationUnitDecl>(
3898                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3899                          ->getDeclContext()) &&
3900                  "Expected artificial target data variable.");
3901           SharedRefLValue =
3902               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3903         } else if (ForDup) {
3904           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3905           SharedRefLValue = CGF.MakeAddrLValue(
3906               SharedRefLValue.getAddress(CGF).withAlignment(
3907                   C.getDeclAlign(OriginalVD)),
3908               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3909               SharedRefLValue.getTBAAInfo());
3910         } else if (CGF.LambdaCaptureFields.count(
3911                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3912                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3913           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3914         } else {
3915           // Processing for implicitly captured variables.
3916           InlinedOpenMPRegionRAII Region(
3917               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3918               /*HasCancel=*/false, /*NoInheritance=*/true);
3919           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3920         }
3921         if (Type->isArrayType()) {
3922           // Initialize firstprivate array.
3923           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3924             // Perform simple memcpy.
3925             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3926           } else {
3927             // Initialize firstprivate array using element-by-element
3928             // initialization.
3929             CGF.EmitOMPAggregateAssign(
3930                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3931                 Type,
3932                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3933                                                   Address SrcElement) {
3934                   // Clean up any temporaries needed by the initialization.
3935                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3936                   InitScope.addPrivate(Elem, SrcElement);
3937                   (void)InitScope.Privatize();
3938                   // Emit initialization for single element.
3939                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3940                       CGF, &CapturesInfo);
3941                   CGF.EmitAnyExprToMem(Init, DestElement,
3942                                        Init->getType().getQualifiers(),
3943                                        /*IsInitializer=*/false);
3944                 });
3945           }
3946         } else {
3947           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3948           InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3949           (void)InitScope.Privatize();
3950           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3951           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3952                              /*capturedByInit=*/false);
3953         }
3954       } else {
3955         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3956       }
3957     }
3958     ++FI;
3959   }
3960 }
3961 
3962 /// Check if duplication function is required for taskloops.
3963 static bool checkInitIsRequired(CodeGenFunction &CGF,
3964                                 ArrayRef<PrivateDataTy> Privates) {
3965   bool InitRequired = false;
3966   for (const PrivateDataTy &Pair : Privates) {
3967     if (Pair.second.isLocalPrivate())
3968       continue;
3969     const VarDecl *VD = Pair.second.PrivateCopy;
3970     const Expr *Init = VD->getAnyInitializer();
3971     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3972                                     !CGF.isTrivialInitializer(Init));
3973     if (InitRequired)
3974       break;
3975   }
3976   return InitRequired;
3977 }
3978 
3979 
3980 /// Emit task_dup function (for initialization of
3981 /// private/firstprivate/lastprivate vars and last_iter flag)
3982 /// \code
3983 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3984 /// lastpriv) {
3985 /// // setup lastprivate flag
3986 ///    task_dst->last = lastpriv;
3987 /// // could be constructor calls here...
3988 /// }
3989 /// \endcode
3990 static llvm::Value *
3991 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3992                     const OMPExecutableDirective &D,
3993                     QualType KmpTaskTWithPrivatesPtrQTy,
3994                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3995                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3996                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3997                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3998   ASTContext &C = CGM.getContext();
3999   FunctionArgList Args;
4000   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4001                            KmpTaskTWithPrivatesPtrQTy,
4002                            ImplicitParamDecl::Other);
4003   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4004                            KmpTaskTWithPrivatesPtrQTy,
4005                            ImplicitParamDecl::Other);
4006   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4007                                 ImplicitParamDecl::Other);
4008   Args.push_back(&DstArg);
4009   Args.push_back(&SrcArg);
4010   Args.push_back(&LastprivArg);
4011   const auto &TaskDupFnInfo =
4012       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4013   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4014   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4015   auto *TaskDup = llvm::Function::Create(
4016       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4017   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4018   TaskDup->setDoesNotRecurse();
4019   CodeGenFunction CGF(CGM);
4020   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4021                     Loc);
4022 
4023   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4024       CGF.GetAddrOfLocalVar(&DstArg),
4025       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4026   // task_dst->liter = lastpriv;
4027   if (WithLastIter) {
4028     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4029     LValue Base = CGF.EmitLValueForField(
4030         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4031     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4032     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4033         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4034     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4035   }
4036 
4037   // Emit initial values for private copies (if any).
4038   assert(!Privates.empty());
4039   Address KmpTaskSharedsPtr = Address::invalid();
4040   if (!Data.FirstprivateVars.empty()) {
4041     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4042         CGF.GetAddrOfLocalVar(&SrcArg),
4043         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4044     LValue Base = CGF.EmitLValueForField(
4045         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4046     KmpTaskSharedsPtr = Address::deprecated(
4047         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4048                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4049                                                   KmpTaskTShareds)),
4050                              Loc),
4051         CGM.getNaturalTypeAlignment(SharedsTy));
4052   }
4053   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4054                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4055   CGF.FinishFunction();
4056   return TaskDup;
4057 }
4058 
4059 /// Checks if destructor function is required to be generated.
4060 /// \return true if cleanups are required, false otherwise.
4061 static bool
4062 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4063                          ArrayRef<PrivateDataTy> Privates) {
4064   for (const PrivateDataTy &P : Privates) {
4065     if (P.second.isLocalPrivate())
4066       continue;
4067     QualType Ty = P.second.Original->getType().getNonReferenceType();
4068     if (Ty.isDestructedType())
4069       return true;
4070   }
4071   return false;
4072 }
4073 
4074 namespace {
4075 /// Loop generator for OpenMP iterator expression.
4076 class OMPIteratorGeneratorScope final
4077     : public CodeGenFunction::OMPPrivateScope {
4078   CodeGenFunction &CGF;
4079   const OMPIteratorExpr *E = nullptr;
4080   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4081   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4082   OMPIteratorGeneratorScope() = delete;
4083   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4084 
4085 public:
4086   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4087       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4088     if (!E)
4089       return;
4090     SmallVector<llvm::Value *, 4> Uppers;
4091     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4092       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4093       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4094       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
4095       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4096       addPrivate(
4097           HelperData.CounterVD,
4098           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
4099     }
4100     Privatize();
4101 
4102     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4103       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4104       LValue CLVal =
4105           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4106                              HelperData.CounterVD->getType());
4107       // Counter = 0;
4108       CGF.EmitStoreOfScalar(
4109           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4110           CLVal);
4111       CodeGenFunction::JumpDest &ContDest =
4112           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4113       CodeGenFunction::JumpDest &ExitDest =
4114           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4115       // N = <number-of_iterations>;
4116       llvm::Value *N = Uppers[I];
4117       // cont:
4118       // if (Counter < N) goto body; else goto exit;
4119       CGF.EmitBlock(ContDest.getBlock());
4120       auto *CVal =
4121           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4122       llvm::Value *Cmp =
4123           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4124               ? CGF.Builder.CreateICmpSLT(CVal, N)
4125               : CGF.Builder.CreateICmpULT(CVal, N);
4126       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4127       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4128       // body:
4129       CGF.EmitBlock(BodyBB);
4130       // Iteri = Begini + Counter * Stepi;
4131       CGF.EmitIgnoredExpr(HelperData.Update);
4132     }
4133   }
4134   ~OMPIteratorGeneratorScope() {
4135     if (!E)
4136       return;
4137     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4138       // Counter = Counter + 1;
4139       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4140       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4141       // goto cont;
4142       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4143       // exit:
4144       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4145     }
4146   }
4147 };
4148 } // namespace
4149 
4150 static std::pair<llvm::Value *, llvm::Value *>
4151 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4152   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4153   llvm::Value *Addr;
4154   if (OASE) {
4155     const Expr *Base = OASE->getBase();
4156     Addr = CGF.EmitScalarExpr(Base);
4157   } else {
4158     Addr = CGF.EmitLValue(E).getPointer(CGF);
4159   }
4160   llvm::Value *SizeVal;
4161   QualType Ty = E->getType();
4162   if (OASE) {
4163     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4164     for (const Expr *SE : OASE->getDimensions()) {
4165       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4166       Sz = CGF.EmitScalarConversion(
4167           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4168       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4169     }
4170   } else if (const auto *ASE =
4171                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4172     LValue UpAddrLVal =
4173         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4174     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4175     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4176         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4177     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4178     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4179     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4180   } else {
4181     SizeVal = CGF.getTypeSize(Ty);
4182   }
4183   return std::make_pair(Addr, SizeVal);
4184 }
4185 
4186 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4187 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4188   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4189   if (KmpTaskAffinityInfoTy.isNull()) {
4190     RecordDecl *KmpAffinityInfoRD =
4191         C.buildImplicitRecord("kmp_task_affinity_info_t");
4192     KmpAffinityInfoRD->startDefinition();
4193     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4194     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4195     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4196     KmpAffinityInfoRD->completeDefinition();
4197     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4198   }
4199 }
4200 
4201 CGOpenMPRuntime::TaskResultTy
4202 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4203                               const OMPExecutableDirective &D,
4204                               llvm::Function *TaskFunction, QualType SharedsTy,
4205                               Address Shareds, const OMPTaskDataTy &Data) {
4206   ASTContext &C = CGM.getContext();
4207   llvm::SmallVector<PrivateDataTy, 4> Privates;
4208   // Aggregate privates and sort them by the alignment.
4209   const auto *I = Data.PrivateCopies.begin();
4210   for (const Expr *E : Data.PrivateVars) {
4211     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4212     Privates.emplace_back(
4213         C.getDeclAlign(VD),
4214         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4215                          /*PrivateElemInit=*/nullptr));
4216     ++I;
4217   }
4218   I = Data.FirstprivateCopies.begin();
4219   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4220   for (const Expr *E : Data.FirstprivateVars) {
4221     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4222     Privates.emplace_back(
4223         C.getDeclAlign(VD),
4224         PrivateHelpersTy(
4225             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4226             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4227     ++I;
4228     ++IElemInitRef;
4229   }
4230   I = Data.LastprivateCopies.begin();
4231   for (const Expr *E : Data.LastprivateVars) {
4232     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4233     Privates.emplace_back(
4234         C.getDeclAlign(VD),
4235         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4236                          /*PrivateElemInit=*/nullptr));
4237     ++I;
4238   }
4239   for (const VarDecl *VD : Data.PrivateLocals) {
4240     if (isAllocatableDecl(VD))
4241       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4242     else
4243       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4244   }
4245   llvm::stable_sort(Privates,
4246                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4247                       return L.first > R.first;
4248                     });
4249   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4250   // Build type kmp_routine_entry_t (if not built yet).
4251   emitKmpRoutineEntryT(KmpInt32Ty);
4252   // Build type kmp_task_t (if not built yet).
4253   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4254     if (SavedKmpTaskloopTQTy.isNull()) {
4255       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4256           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4257     }
4258     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4259   } else {
4260     assert((D.getDirectiveKind() == OMPD_task ||
4261             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4262             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4263            "Expected taskloop, task or target directive");
4264     if (SavedKmpTaskTQTy.isNull()) {
4265       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4266           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4267     }
4268     KmpTaskTQTy = SavedKmpTaskTQTy;
4269   }
4270   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4271   // Build particular struct kmp_task_t for the given task.
4272   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4273       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4274   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4275   QualType KmpTaskTWithPrivatesPtrQTy =
4276       C.getPointerType(KmpTaskTWithPrivatesQTy);
4277   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4278   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4279       KmpTaskTWithPrivatesTy->getPointerTo();
4280   llvm::Value *KmpTaskTWithPrivatesTySize =
4281       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4282   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4283 
4284   // Emit initial values for private copies (if any).
4285   llvm::Value *TaskPrivatesMap = nullptr;
4286   llvm::Type *TaskPrivatesMapTy =
4287       std::next(TaskFunction->arg_begin(), 3)->getType();
4288   if (!Privates.empty()) {
4289     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4290     TaskPrivatesMap =
4291         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4292     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4293         TaskPrivatesMap, TaskPrivatesMapTy);
4294   } else {
4295     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4296         cast<llvm::PointerType>(TaskPrivatesMapTy));
4297   }
4298   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4299   // kmp_task_t *tt);
4300   llvm::Function *TaskEntry = emitProxyTaskFunction(
4301       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4302       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4303       TaskPrivatesMap);
4304 
4305   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4306   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4307   // kmp_routine_entry_t *task_entry);
4308   // Task flags. Format is taken from
4309   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4310   // description of kmp_tasking_flags struct.
4311   enum {
4312     TiedFlag = 0x1,
4313     FinalFlag = 0x2,
4314     DestructorsFlag = 0x8,
4315     PriorityFlag = 0x20,
4316     DetachableFlag = 0x40,
4317   };
4318   unsigned Flags = Data.Tied ? TiedFlag : 0;
4319   bool NeedsCleanup = false;
4320   if (!Privates.empty()) {
4321     NeedsCleanup =
4322         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4323     if (NeedsCleanup)
4324       Flags = Flags | DestructorsFlag;
4325   }
4326   if (Data.Priority.getInt())
4327     Flags = Flags | PriorityFlag;
4328   if (D.hasClausesOfKind<OMPDetachClause>())
4329     Flags = Flags | DetachableFlag;
4330   llvm::Value *TaskFlags =
4331       Data.Final.getPointer()
4332           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4333                                      CGF.Builder.getInt32(FinalFlag),
4334                                      CGF.Builder.getInt32(/*C=*/0))
4335           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4336   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4337   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4338   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4339       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4340       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4341           TaskEntry, KmpRoutineEntryPtrTy)};
4342   llvm::Value *NewTask;
4343   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4344     // Check if we have any device clause associated with the directive.
4345     const Expr *Device = nullptr;
4346     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4347       Device = C->getDevice();
4348     // Emit device ID if any otherwise use default value.
4349     llvm::Value *DeviceID;
4350     if (Device)
4351       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4352                                            CGF.Int64Ty, /*isSigned=*/true);
4353     else
4354       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4355     AllocArgs.push_back(DeviceID);
4356     NewTask = CGF.EmitRuntimeCall(
4357         OMPBuilder.getOrCreateRuntimeFunction(
4358             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4359         AllocArgs);
4360   } else {
4361     NewTask =
4362         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4363                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4364                             AllocArgs);
4365   }
4366   // Emit detach clause initialization.
4367   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4368   // task_descriptor);
4369   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4370     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4371     LValue EvtLVal = CGF.EmitLValue(Evt);
4372 
4373     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4374     // int gtid, kmp_task_t *task);
4375     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4376     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4377     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4378     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4379         OMPBuilder.getOrCreateRuntimeFunction(
4380             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4381         {Loc, Tid, NewTask});
4382     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4383                                       Evt->getExprLoc());
4384     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4385   }
4386   // Process affinity clauses.
4387   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4388     // Process list of affinity data.
4389     ASTContext &C = CGM.getContext();
4390     Address AffinitiesArray = Address::invalid();
4391     // Calculate number of elements to form the array of affinity data.
4392     llvm::Value *NumOfElements = nullptr;
4393     unsigned NumAffinities = 0;
4394     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4395       if (const Expr *Modifier = C->getModifier()) {
4396         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4397         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4398           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4399           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4400           NumOfElements =
4401               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4402         }
4403       } else {
4404         NumAffinities += C->varlist_size();
4405       }
4406     }
4407     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4408     // Fields ids in kmp_task_affinity_info record.
4409     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4410 
4411     QualType KmpTaskAffinityInfoArrayTy;
4412     if (NumOfElements) {
4413       NumOfElements = CGF.Builder.CreateNUWAdd(
4414           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4415       auto *OVE = new (C) OpaqueValueExpr(
4416           Loc,
4417           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4418           VK_PRValue);
4419       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4420                                                     RValue::get(NumOfElements));
4421       KmpTaskAffinityInfoArrayTy =
4422           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4423                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4424       // Properly emit variable-sized array.
4425       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4426                                            ImplicitParamDecl::Other);
4427       CGF.EmitVarDecl(*PD);
4428       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4429       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4430                                                 /*isSigned=*/false);
4431     } else {
4432       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4433           KmpTaskAffinityInfoTy,
4434           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4435           ArrayType::Normal, /*IndexTypeQuals=*/0);
4436       AffinitiesArray =
4437           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4438       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4439       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4440                                              /*isSigned=*/false);
4441     }
4442 
4443     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4444     // Fill array by elements without iterators.
4445     unsigned Pos = 0;
4446     bool HasIterator = false;
4447     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4448       if (C->getModifier()) {
4449         HasIterator = true;
4450         continue;
4451       }
4452       for (const Expr *E : C->varlists()) {
4453         llvm::Value *Addr;
4454         llvm::Value *Size;
4455         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4456         LValue Base =
4457             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4458                                KmpTaskAffinityInfoTy);
4459         // affs[i].base_addr = &<Affinities[i].second>;
4460         LValue BaseAddrLVal = CGF.EmitLValueForField(
4461             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4462         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4463                               BaseAddrLVal);
4464         // affs[i].len = sizeof(<Affinities[i].second>);
4465         LValue LenLVal = CGF.EmitLValueForField(
4466             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4467         CGF.EmitStoreOfScalar(Size, LenLVal);
4468         ++Pos;
4469       }
4470     }
4471     LValue PosLVal;
4472     if (HasIterator) {
4473       PosLVal = CGF.MakeAddrLValue(
4474           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4475           C.getSizeType());
4476       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4477     }
4478     // Process elements with iterators.
4479     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4480       const Expr *Modifier = C->getModifier();
4481       if (!Modifier)
4482         continue;
4483       OMPIteratorGeneratorScope IteratorScope(
4484           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4485       for (const Expr *E : C->varlists()) {
4486         llvm::Value *Addr;
4487         llvm::Value *Size;
4488         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4489         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4490         LValue Base = CGF.MakeAddrLValue(
4491             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4492         // affs[i].base_addr = &<Affinities[i].second>;
4493         LValue BaseAddrLVal = CGF.EmitLValueForField(
4494             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4495         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4496                               BaseAddrLVal);
4497         // affs[i].len = sizeof(<Affinities[i].second>);
4498         LValue LenLVal = CGF.EmitLValueForField(
4499             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4500         CGF.EmitStoreOfScalar(Size, LenLVal);
4501         Idx = CGF.Builder.CreateNUWAdd(
4502             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4503         CGF.EmitStoreOfScalar(Idx, PosLVal);
4504       }
4505     }
4506     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4507     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4508     // naffins, kmp_task_affinity_info_t *affin_list);
4509     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4510     llvm::Value *GTid = getThreadID(CGF, Loc);
4511     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4512         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4513     // FIXME: Emit the function and ignore its result for now unless the
4514     // runtime function is properly implemented.
4515     (void)CGF.EmitRuntimeCall(
4516         OMPBuilder.getOrCreateRuntimeFunction(
4517             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4518         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4519   }
4520   llvm::Value *NewTaskNewTaskTTy =
4521       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4522           NewTask, KmpTaskTWithPrivatesPtrTy);
4523   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4524                                                KmpTaskTWithPrivatesQTy);
4525   LValue TDBase =
4526       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4527   // Fill the data in the resulting kmp_task_t record.
4528   // Copy shareds if there are any.
4529   Address KmpTaskSharedsPtr = Address::invalid();
4530   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4531     KmpTaskSharedsPtr = Address::deprecated(
4532         CGF.EmitLoadOfScalar(
4533             CGF.EmitLValueForField(
4534                 TDBase,
4535                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4536             Loc),
4537         CGM.getNaturalTypeAlignment(SharedsTy));
4538     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4539     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4540     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4541   }
4542   // Emit initial values for private copies (if any).
4543   TaskResultTy Result;
4544   if (!Privates.empty()) {
4545     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4546                      SharedsTy, SharedsPtrTy, Data, Privates,
4547                      /*ForDup=*/false);
4548     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4549         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4550       Result.TaskDupFn = emitTaskDupFunction(
4551           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4552           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4553           /*WithLastIter=*/!Data.LastprivateVars.empty());
4554     }
4555   }
4556   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4557   enum { Priority = 0, Destructors = 1 };
4558   // Provide pointer to function with destructors for privates.
4559   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4560   const RecordDecl *KmpCmplrdataUD =
4561       (*FI)->getType()->getAsUnionType()->getDecl();
4562   if (NeedsCleanup) {
4563     llvm::Value *DestructorFn = emitDestructorsFunction(
4564         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4565         KmpTaskTWithPrivatesQTy);
4566     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4567     LValue DestructorsLV = CGF.EmitLValueForField(
4568         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4569     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4570                               DestructorFn, KmpRoutineEntryPtrTy),
4571                           DestructorsLV);
4572   }
4573   // Set priority.
4574   if (Data.Priority.getInt()) {
4575     LValue Data2LV = CGF.EmitLValueForField(
4576         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4577     LValue PriorityLV = CGF.EmitLValueForField(
4578         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4579     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4580   }
4581   Result.NewTask = NewTask;
4582   Result.TaskEntry = TaskEntry;
4583   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4584   Result.TDBase = TDBase;
4585   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4586   return Result;
4587 }
4588 
4589 namespace {
4590 /// Dependence kind for RTL.
4591 enum RTLDependenceKindTy {
4592   DepIn = 0x01,
4593   DepInOut = 0x3,
4594   DepMutexInOutSet = 0x4,
4595   DepInOutSet = 0x8
4596 };
4597 /// Fields ids in kmp_depend_info record.
4598 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4599 } // namespace
4600 
4601 /// Translates internal dependency kind into the runtime kind.
4602 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4603   RTLDependenceKindTy DepKind;
4604   switch (K) {
4605   case OMPC_DEPEND_in:
4606     DepKind = DepIn;
4607     break;
4608   // Out and InOut dependencies must use the same code.
4609   case OMPC_DEPEND_out:
4610   case OMPC_DEPEND_inout:
4611     DepKind = DepInOut;
4612     break;
4613   case OMPC_DEPEND_mutexinoutset:
4614     DepKind = DepMutexInOutSet;
4615     break;
4616   case OMPC_DEPEND_inoutset:
4617     DepKind = DepInOutSet;
4618     break;
4619   case OMPC_DEPEND_source:
4620   case OMPC_DEPEND_sink:
4621   case OMPC_DEPEND_depobj:
4622   case OMPC_DEPEND_unknown:
4623     llvm_unreachable("Unknown task dependence type");
4624   }
4625   return DepKind;
4626 }
4627 
4628 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4629 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4630                            QualType &FlagsTy) {
4631   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4632   if (KmpDependInfoTy.isNull()) {
4633     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4634     KmpDependInfoRD->startDefinition();
4635     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4636     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4637     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4638     KmpDependInfoRD->completeDefinition();
4639     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4640   }
4641 }
4642 
4643 std::pair<llvm::Value *, LValue>
4644 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4645                                    SourceLocation Loc) {
4646   ASTContext &C = CGM.getContext();
4647   QualType FlagsTy;
4648   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4649   RecordDecl *KmpDependInfoRD =
4650       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4651   LValue Base = CGF.EmitLoadOfPointerLValue(
4652       DepobjLVal.getAddress(CGF),
4653       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4654   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4655   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4656       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4657       CGF.ConvertTypeForMem(KmpDependInfoTy));
4658   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4659                             Base.getTBAAInfo());
4660   Address DepObjAddr = CGF.Builder.CreateGEP(
4661       Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4662   LValue NumDepsBase = CGF.MakeAddrLValue(
4663       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4664   // NumDeps = deps[i].base_addr;
4665   LValue BaseAddrLVal = CGF.EmitLValueForField(
4666       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4667   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4668   return std::make_pair(NumDeps, Base);
4669 }
4670 
4671 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4672                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4673                            const OMPTaskDataTy::DependData &Data,
4674                            Address DependenciesArray) {
4675   CodeGenModule &CGM = CGF.CGM;
4676   ASTContext &C = CGM.getContext();
4677   QualType FlagsTy;
4678   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4679   RecordDecl *KmpDependInfoRD =
4680       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4681   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4682 
4683   OMPIteratorGeneratorScope IteratorScope(
4684       CGF, cast_or_null<OMPIteratorExpr>(
4685                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4686                                  : nullptr));
4687   for (const Expr *E : Data.DepExprs) {
4688     llvm::Value *Addr;
4689     llvm::Value *Size;
4690     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4691     LValue Base;
4692     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4693       Base = CGF.MakeAddrLValue(
4694           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4695     } else {
4696       LValue &PosLVal = *Pos.get<LValue *>();
4697       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4698       Base = CGF.MakeAddrLValue(
4699           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4700     }
4701     // deps[i].base_addr = &<Dependencies[i].second>;
4702     LValue BaseAddrLVal = CGF.EmitLValueForField(
4703         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4704     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4705                           BaseAddrLVal);
4706     // deps[i].len = sizeof(<Dependencies[i].second>);
4707     LValue LenLVal = CGF.EmitLValueForField(
4708         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4709     CGF.EmitStoreOfScalar(Size, LenLVal);
4710     // deps[i].flags = <Dependencies[i].first>;
4711     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4712     LValue FlagsLVal = CGF.EmitLValueForField(
4713         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4714     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4715                           FlagsLVal);
4716     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4717       ++(*P);
4718     } else {
4719       LValue &PosLVal = *Pos.get<LValue *>();
4720       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4721       Idx = CGF.Builder.CreateNUWAdd(Idx,
4722                                      llvm::ConstantInt::get(Idx->getType(), 1));
4723       CGF.EmitStoreOfScalar(Idx, PosLVal);
4724     }
4725   }
4726 }
4727 
4728 static SmallVector<llvm::Value *, 4>
4729 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4730                         const OMPTaskDataTy::DependData &Data) {
4731   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4732          "Expected depobj dependecy kind.");
4733   SmallVector<llvm::Value *, 4> Sizes;
4734   SmallVector<LValue, 4> SizeLVals;
4735   ASTContext &C = CGF.getContext();
4736   QualType FlagsTy;
4737   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4738   RecordDecl *KmpDependInfoRD =
4739       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4740   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4741   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4742   {
4743     OMPIteratorGeneratorScope IteratorScope(
4744         CGF, cast_or_null<OMPIteratorExpr>(
4745                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4746                                    : nullptr));
4747     for (const Expr *E : Data.DepExprs) {
4748       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4749       LValue Base = CGF.EmitLoadOfPointerLValue(
4750           DepobjLVal.getAddress(CGF),
4751           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4752       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4753           Base.getAddress(CGF), KmpDependInfoPtrT,
4754           CGF.ConvertTypeForMem(KmpDependInfoTy));
4755       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4756                                 Base.getTBAAInfo());
4757       Address DepObjAddr = CGF.Builder.CreateGEP(
4758           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4759       LValue NumDepsBase = CGF.MakeAddrLValue(
4760           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4761       // NumDeps = deps[i].base_addr;
4762       LValue BaseAddrLVal = CGF.EmitLValueForField(
4763           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4764       llvm::Value *NumDeps =
4765           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4766       LValue NumLVal = CGF.MakeAddrLValue(
4767           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4768           C.getUIntPtrType());
4769       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4770                               NumLVal.getAddress(CGF));
4771       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4772       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4773       CGF.EmitStoreOfScalar(Add, NumLVal);
4774       SizeLVals.push_back(NumLVal);
4775     }
4776   }
4777   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4778     llvm::Value *Size =
4779         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4780     Sizes.push_back(Size);
4781   }
4782   return Sizes;
4783 }
4784 
4785 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4786                                LValue PosLVal,
4787                                const OMPTaskDataTy::DependData &Data,
4788                                Address DependenciesArray) {
4789   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4790          "Expected depobj dependecy kind.");
4791   ASTContext &C = CGF.getContext();
4792   QualType FlagsTy;
4793   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4794   RecordDecl *KmpDependInfoRD =
4795       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4796   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4797   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4798   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4799   {
4800     OMPIteratorGeneratorScope IteratorScope(
4801         CGF, cast_or_null<OMPIteratorExpr>(
4802                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4803                                    : nullptr));
4804     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4805       const Expr *E = Data.DepExprs[I];
4806       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4807       LValue Base = CGF.EmitLoadOfPointerLValue(
4808           DepobjLVal.getAddress(CGF),
4809           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4810       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4811           Base.getAddress(CGF), KmpDependInfoPtrT,
4812           CGF.ConvertTypeForMem(KmpDependInfoTy));
4813       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4814                                 Base.getTBAAInfo());
4815 
4816       // Get number of elements in a single depobj.
4817       Address DepObjAddr = CGF.Builder.CreateGEP(
4818           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4819       LValue NumDepsBase = CGF.MakeAddrLValue(
4820           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4821       // NumDeps = deps[i].base_addr;
4822       LValue BaseAddrLVal = CGF.EmitLValueForField(
4823           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4824       llvm::Value *NumDeps =
4825           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4826 
4827       // memcopy dependency data.
4828       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4829           ElSize,
4830           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4831       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4832       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4833       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4834 
4835       // Increase pos.
4836       // pos += size;
4837       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4838       CGF.EmitStoreOfScalar(Add, PosLVal);
4839     }
4840   }
4841 }
4842 
4843 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4844     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4845     SourceLocation Loc) {
4846   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4847         return D.DepExprs.empty();
4848       }))
4849     return std::make_pair(nullptr, Address::invalid());
4850   // Process list of dependencies.
4851   ASTContext &C = CGM.getContext();
4852   Address DependenciesArray = Address::invalid();
4853   llvm::Value *NumOfElements = nullptr;
4854   unsigned NumDependencies = std::accumulate(
4855       Dependencies.begin(), Dependencies.end(), 0,
4856       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4857         return D.DepKind == OMPC_DEPEND_depobj
4858                    ? V
4859                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4860       });
4861   QualType FlagsTy;
4862   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4863   bool HasDepobjDeps = false;
4864   bool HasRegularWithIterators = false;
4865   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4866   llvm::Value *NumOfRegularWithIterators =
4867       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4868   // Calculate number of depobj dependecies and regular deps with the iterators.
4869   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4870     if (D.DepKind == OMPC_DEPEND_depobj) {
4871       SmallVector<llvm::Value *, 4> Sizes =
4872           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4873       for (llvm::Value *Size : Sizes) {
4874         NumOfDepobjElements =
4875             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4876       }
4877       HasDepobjDeps = true;
4878       continue;
4879     }
4880     // Include number of iterations, if any.
4881 
4882     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4883       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4884         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4885         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4886         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4887             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4888         NumOfRegularWithIterators =
4889             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4890       }
4891       HasRegularWithIterators = true;
4892       continue;
4893     }
4894   }
4895 
4896   QualType KmpDependInfoArrayTy;
4897   if (HasDepobjDeps || HasRegularWithIterators) {
4898     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4899                                            /*isSigned=*/false);
4900     if (HasDepobjDeps) {
4901       NumOfElements =
4902           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4903     }
4904     if (HasRegularWithIterators) {
4905       NumOfElements =
4906           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4907     }
4908     auto *OVE = new (C) OpaqueValueExpr(
4909         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4910         VK_PRValue);
4911     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4912                                                   RValue::get(NumOfElements));
4913     KmpDependInfoArrayTy =
4914         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4915                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4916     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4917     // Properly emit variable-sized array.
4918     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4919                                          ImplicitParamDecl::Other);
4920     CGF.EmitVarDecl(*PD);
4921     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4922     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4923                                               /*isSigned=*/false);
4924   } else {
4925     KmpDependInfoArrayTy = C.getConstantArrayType(
4926         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4927         ArrayType::Normal, /*IndexTypeQuals=*/0);
4928     DependenciesArray =
4929         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4930     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4931     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4932                                            /*isSigned=*/false);
4933   }
4934   unsigned Pos = 0;
4935   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4936     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4937         Dependencies[I].IteratorExpr)
4938       continue;
4939     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4940                    DependenciesArray);
4941   }
4942   // Copy regular dependecies with iterators.
4943   LValue PosLVal = CGF.MakeAddrLValue(
4944       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4945   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4946   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4947     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4948         !Dependencies[I].IteratorExpr)
4949       continue;
4950     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4951                    DependenciesArray);
4952   }
4953   // Copy final depobj arrays without iterators.
4954   if (HasDepobjDeps) {
4955     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4956       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4957         continue;
4958       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4959                          DependenciesArray);
4960     }
4961   }
4962   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4963       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4964   return std::make_pair(NumOfElements, DependenciesArray);
4965 }
4966 
4967 Address CGOpenMPRuntime::emitDepobjDependClause(
4968     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4969     SourceLocation Loc) {
4970   if (Dependencies.DepExprs.empty())
4971     return Address::invalid();
4972   // Process list of dependencies.
4973   ASTContext &C = CGM.getContext();
4974   Address DependenciesArray = Address::invalid();
4975   unsigned NumDependencies = Dependencies.DepExprs.size();
4976   QualType FlagsTy;
4977   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4978   RecordDecl *KmpDependInfoRD =
4979       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4980 
4981   llvm::Value *Size;
4982   // Define type kmp_depend_info[<Dependencies.size()>];
4983   // For depobj reserve one extra element to store the number of elements.
4984   // It is required to handle depobj(x) update(in) construct.
4985   // kmp_depend_info[<Dependencies.size()>] deps;
4986   llvm::Value *NumDepsVal;
4987   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4988   if (const auto *IE =
4989           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4990     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4991     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4992       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4993       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4994       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4995     }
4996     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4997                                     NumDepsVal);
4998     CharUnits SizeInBytes =
4999         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5000     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5001     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5002     NumDepsVal =
5003         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5004   } else {
5005     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5006         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5007         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5008     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5009     Size = CGM.getSize(Sz.alignTo(Align));
5010     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5011   }
5012   // Need to allocate on the dynamic memory.
5013   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5014   // Use default allocator.
5015   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5016   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5017 
5018   llvm::Value *Addr =
5019       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5020                               CGM.getModule(), OMPRTL___kmpc_alloc),
5021                           Args, ".dep.arr.addr");
5022   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5023       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5024   DependenciesArray = Address::deprecated(Addr, Align);
5025   // Write number of elements in the first element of array for depobj.
5026   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5027   // deps[i].base_addr = NumDependencies;
5028   LValue BaseAddrLVal = CGF.EmitLValueForField(
5029       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5030   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5031   llvm::PointerUnion<unsigned *, LValue *> Pos;
5032   unsigned Idx = 1;
5033   LValue PosLVal;
5034   if (Dependencies.IteratorExpr) {
5035     PosLVal = CGF.MakeAddrLValue(
5036         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5037         C.getSizeType());
5038     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5039                           /*IsInit=*/true);
5040     Pos = &PosLVal;
5041   } else {
5042     Pos = &Idx;
5043   }
5044   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5045   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5046       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
5047       CGF.Int8Ty);
5048   return DependenciesArray;
5049 }
5050 
5051 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5052                                         SourceLocation Loc) {
5053   ASTContext &C = CGM.getContext();
5054   QualType FlagsTy;
5055   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5056   LValue Base = CGF.EmitLoadOfPointerLValue(
5057       DepobjLVal.getAddress(CGF),
5058       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5059   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5060   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5061       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
5062       CGF.ConvertTypeForMem(KmpDependInfoTy));
5063   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5064       Addr.getElementType(), Addr.getPointer(),
5065       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5066   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5067                                                                CGF.VoidPtrTy);
5068   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5069   // Use default allocator.
5070   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5071   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5072 
5073   // _kmpc_free(gtid, addr, nullptr);
5074   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5075                                 CGM.getModule(), OMPRTL___kmpc_free),
5076                             Args);
5077 }
5078 
5079 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5080                                        OpenMPDependClauseKind NewDepKind,
5081                                        SourceLocation Loc) {
5082   ASTContext &C = CGM.getContext();
5083   QualType FlagsTy;
5084   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5085   RecordDecl *KmpDependInfoRD =
5086       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5087   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5088   llvm::Value *NumDeps;
5089   LValue Base;
5090   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5091 
5092   Address Begin = Base.getAddress(CGF);
5093   // Cast from pointer to array type to pointer to single element.
5094   llvm::Value *End = CGF.Builder.CreateGEP(
5095       Begin.getElementType(), Begin.getPointer(), NumDeps);
5096   // The basic structure here is a while-do loop.
5097   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5098   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5099   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5100   CGF.EmitBlock(BodyBB);
5101   llvm::PHINode *ElementPHI =
5102       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5103   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5104   Begin = Begin.withPointer(ElementPHI);
5105   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5106                             Base.getTBAAInfo());
5107   // deps[i].flags = NewDepKind;
5108   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5109   LValue FlagsLVal = CGF.EmitLValueForField(
5110       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5111   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5112                         FlagsLVal);
5113 
5114   // Shift the address forward by one element.
5115   Address ElementNext =
5116       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5117   ElementPHI->addIncoming(ElementNext.getPointer(),
5118                           CGF.Builder.GetInsertBlock());
5119   llvm::Value *IsEmpty =
5120       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5121   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5122   // Done.
5123   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5124 }
5125 
5126 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5127                                    const OMPExecutableDirective &D,
5128                                    llvm::Function *TaskFunction,
5129                                    QualType SharedsTy, Address Shareds,
5130                                    const Expr *IfCond,
5131                                    const OMPTaskDataTy &Data) {
5132   if (!CGF.HaveInsertPoint())
5133     return;
5134 
5135   TaskResultTy Result =
5136       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5137   llvm::Value *NewTask = Result.NewTask;
5138   llvm::Function *TaskEntry = Result.TaskEntry;
5139   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5140   LValue TDBase = Result.TDBase;
5141   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5142   // Process list of dependences.
5143   Address DependenciesArray = Address::invalid();
5144   llvm::Value *NumOfElements;
5145   std::tie(NumOfElements, DependenciesArray) =
5146       emitDependClause(CGF, Data.Dependences, Loc);
5147 
5148   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5149   // libcall.
5150   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5151   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5152   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5153   // list is not empty
5154   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5155   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5156   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5157   llvm::Value *DepTaskArgs[7];
5158   if (!Data.Dependences.empty()) {
5159     DepTaskArgs[0] = UpLoc;
5160     DepTaskArgs[1] = ThreadID;
5161     DepTaskArgs[2] = NewTask;
5162     DepTaskArgs[3] = NumOfElements;
5163     DepTaskArgs[4] = DependenciesArray.getPointer();
5164     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5165     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5166   }
5167   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5168                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5169     if (!Data.Tied) {
5170       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5171       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5172       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5173     }
5174     if (!Data.Dependences.empty()) {
5175       CGF.EmitRuntimeCall(
5176           OMPBuilder.getOrCreateRuntimeFunction(
5177               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5178           DepTaskArgs);
5179     } else {
5180       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5181                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5182                           TaskArgs);
5183     }
5184     // Check if parent region is untied and build return for untied task;
5185     if (auto *Region =
5186             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5187       Region->emitUntiedSwitch(CGF);
5188   };
5189 
5190   llvm::Value *DepWaitTaskArgs[6];
5191   if (!Data.Dependences.empty()) {
5192     DepWaitTaskArgs[0] = UpLoc;
5193     DepWaitTaskArgs[1] = ThreadID;
5194     DepWaitTaskArgs[2] = NumOfElements;
5195     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5196     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5197     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5198   }
5199   auto &M = CGM.getModule();
5200   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5201                         TaskEntry, &Data, &DepWaitTaskArgs,
5202                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5203     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5204     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5205     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5206     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5207     // is specified.
5208     if (!Data.Dependences.empty())
5209       CGF.EmitRuntimeCall(
5210           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5211           DepWaitTaskArgs);
5212     // Call proxy_task_entry(gtid, new_task);
5213     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5214                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5215       Action.Enter(CGF);
5216       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5217       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5218                                                           OutlinedFnArgs);
5219     };
5220 
5221     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5222     // kmp_task_t *new_task);
5223     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5224     // kmp_task_t *new_task);
5225     RegionCodeGenTy RCG(CodeGen);
5226     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5227                               M, OMPRTL___kmpc_omp_task_begin_if0),
5228                           TaskArgs,
5229                           OMPBuilder.getOrCreateRuntimeFunction(
5230                               M, OMPRTL___kmpc_omp_task_complete_if0),
5231                           TaskArgs);
5232     RCG.setAction(Action);
5233     RCG(CGF);
5234   };
5235 
5236   if (IfCond) {
5237     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5238   } else {
5239     RegionCodeGenTy ThenRCG(ThenCodeGen);
5240     ThenRCG(CGF);
5241   }
5242 }
5243 
5244 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5245                                        const OMPLoopDirective &D,
5246                                        llvm::Function *TaskFunction,
5247                                        QualType SharedsTy, Address Shareds,
5248                                        const Expr *IfCond,
5249                                        const OMPTaskDataTy &Data) {
5250   if (!CGF.HaveInsertPoint())
5251     return;
5252   TaskResultTy Result =
5253       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5254   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5255   // libcall.
5256   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5257   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5258   // sched, kmp_uint64 grainsize, void *task_dup);
5259   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5260   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5261   llvm::Value *IfVal;
5262   if (IfCond) {
5263     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5264                                       /*isSigned=*/true);
5265   } else {
5266     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5267   }
5268 
5269   LValue LBLVal = CGF.EmitLValueForField(
5270       Result.TDBase,
5271       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5272   const auto *LBVar =
5273       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5274   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5275                        LBLVal.getQuals(),
5276                        /*IsInitializer=*/true);
5277   LValue UBLVal = CGF.EmitLValueForField(
5278       Result.TDBase,
5279       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5280   const auto *UBVar =
5281       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5282   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5283                        UBLVal.getQuals(),
5284                        /*IsInitializer=*/true);
5285   LValue StLVal = CGF.EmitLValueForField(
5286       Result.TDBase,
5287       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5288   const auto *StVar =
5289       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5290   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5291                        StLVal.getQuals(),
5292                        /*IsInitializer=*/true);
5293   // Store reductions address.
5294   LValue RedLVal = CGF.EmitLValueForField(
5295       Result.TDBase,
5296       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5297   if (Data.Reductions) {
5298     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5299   } else {
5300     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5301                                CGF.getContext().VoidPtrTy);
5302   }
5303   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5304   llvm::Value *TaskArgs[] = {
5305       UpLoc,
5306       ThreadID,
5307       Result.NewTask,
5308       IfVal,
5309       LBLVal.getPointer(CGF),
5310       UBLVal.getPointer(CGF),
5311       CGF.EmitLoadOfScalar(StLVal, Loc),
5312       llvm::ConstantInt::getSigned(
5313           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5314       llvm::ConstantInt::getSigned(
5315           CGF.IntTy, Data.Schedule.getPointer()
5316                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5317                          : NoSchedule),
5318       Data.Schedule.getPointer()
5319           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5320                                       /*isSigned=*/false)
5321           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5322       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5323                              Result.TaskDupFn, CGF.VoidPtrTy)
5324                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5325   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5326                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5327                       TaskArgs);
5328 }
5329 
5330 /// Emit reduction operation for each element of array (required for
5331 /// array sections) LHS op = RHS.
5332 /// \param Type Type of array.
5333 /// \param LHSVar Variable on the left side of the reduction operation
5334 /// (references element of array in original variable).
5335 /// \param RHSVar Variable on the right side of the reduction operation
5336 /// (references element of array in original variable).
5337 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5338 /// RHSVar.
5339 static void EmitOMPAggregateReduction(
5340     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5341     const VarDecl *RHSVar,
5342     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5343                                   const Expr *, const Expr *)> &RedOpGen,
5344     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5345     const Expr *UpExpr = nullptr) {
5346   // Perform element-by-element initialization.
5347   QualType ElementTy;
5348   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5349   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5350 
5351   // Drill down to the base element type on both arrays.
5352   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5353   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5354 
5355   llvm::Value *RHSBegin = RHSAddr.getPointer();
5356   llvm::Value *LHSBegin = LHSAddr.getPointer();
5357   // Cast from pointer to array type to pointer to single element.
5358   llvm::Value *LHSEnd =
5359       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5360   // The basic structure here is a while-do loop.
5361   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5362   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5363   llvm::Value *IsEmpty =
5364       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5365   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5366 
5367   // Enter the loop body, making that address the current address.
5368   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5369   CGF.EmitBlock(BodyBB);
5370 
5371   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5372 
5373   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5374       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5375   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5376   Address RHSElementCurrent = Address::deprecated(
5377       RHSElementPHI,
5378       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5379 
5380   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5381       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5382   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5383   Address LHSElementCurrent = Address::deprecated(
5384       LHSElementPHI,
5385       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5386 
5387   // Emit copy.
5388   CodeGenFunction::OMPPrivateScope Scope(CGF);
5389   Scope.addPrivate(LHSVar, LHSElementCurrent);
5390   Scope.addPrivate(RHSVar, RHSElementCurrent);
5391   Scope.Privatize();
5392   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5393   Scope.ForceCleanup();
5394 
5395   // Shift the address forward by one element.
5396   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5397       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5398       "omp.arraycpy.dest.element");
5399   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5400       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5401       "omp.arraycpy.src.element");
5402   // Check whether we've reached the end.
5403   llvm::Value *Done =
5404       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5405   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5406   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5407   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5408 
5409   // Done.
5410   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5411 }
5412 
5413 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5414 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5415 /// UDR combiner function.
5416 static void emitReductionCombiner(CodeGenFunction &CGF,
5417                                   const Expr *ReductionOp) {
5418   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5419     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5420       if (const auto *DRE =
5421               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5422         if (const auto *DRD =
5423                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5424           std::pair<llvm::Function *, llvm::Function *> Reduction =
5425               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5426           RValue Func = RValue::get(Reduction.first);
5427           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5428           CGF.EmitIgnoredExpr(ReductionOp);
5429           return;
5430         }
5431   CGF.EmitIgnoredExpr(ReductionOp);
5432 }
5433 
5434 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5435     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5436     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5437     ArrayRef<const Expr *> ReductionOps) {
5438   ASTContext &C = CGM.getContext();
5439 
5440   // void reduction_func(void *LHSArg, void *RHSArg);
5441   FunctionArgList Args;
5442   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5443                            ImplicitParamDecl::Other);
5444   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5445                            ImplicitParamDecl::Other);
5446   Args.push_back(&LHSArg);
5447   Args.push_back(&RHSArg);
5448   const auto &CGFI =
5449       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5450   std::string Name = getName({"omp", "reduction", "reduction_func"});
5451   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5452                                     llvm::GlobalValue::InternalLinkage, Name,
5453                                     &CGM.getModule());
5454   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5455   Fn->setDoesNotRecurse();
5456   CodeGenFunction CGF(CGM);
5457   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5458 
5459   // Dst = (void*[n])(LHSArg);
5460   // Src = (void*[n])(RHSArg);
5461   Address LHS = Address::deprecated(
5462       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5463           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType),
5464       CGF.getPointerAlign());
5465   Address RHS = Address::deprecated(
5466       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5467           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType),
5468       CGF.getPointerAlign());
5469 
5470   //  ...
5471   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5472   //  ...
5473   CodeGenFunction::OMPPrivateScope Scope(CGF);
5474   const auto *IPriv = Privates.begin();
5475   unsigned Idx = 0;
5476   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5477     const auto *RHSVar =
5478         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5479     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5480     const auto *LHSVar =
5481         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5482     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5483     QualType PrivTy = (*IPriv)->getType();
5484     if (PrivTy->isVariablyModifiedType()) {
5485       // Get array size and emit VLA type.
5486       ++Idx;
5487       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5488       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5489       const VariableArrayType *VLA =
5490           CGF.getContext().getAsVariableArrayType(PrivTy);
5491       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5492       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5493           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5494       CGF.EmitVariablyModifiedType(PrivTy);
5495     }
5496   }
5497   Scope.Privatize();
5498   IPriv = Privates.begin();
5499   const auto *ILHS = LHSExprs.begin();
5500   const auto *IRHS = RHSExprs.begin();
5501   for (const Expr *E : ReductionOps) {
5502     if ((*IPriv)->getType()->isArrayType()) {
5503       // Emit reduction for array section.
5504       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5505       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5506       EmitOMPAggregateReduction(
5507           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5508           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5509             emitReductionCombiner(CGF, E);
5510           });
5511     } else {
5512       // Emit reduction for array subscript or single variable.
5513       emitReductionCombiner(CGF, E);
5514     }
5515     ++IPriv;
5516     ++ILHS;
5517     ++IRHS;
5518   }
5519   Scope.ForceCleanup();
5520   CGF.FinishFunction();
5521   return Fn;
5522 }
5523 
5524 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5525                                                   const Expr *ReductionOp,
5526                                                   const Expr *PrivateRef,
5527                                                   const DeclRefExpr *LHS,
5528                                                   const DeclRefExpr *RHS) {
5529   if (PrivateRef->getType()->isArrayType()) {
5530     // Emit reduction for array section.
5531     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5532     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5533     EmitOMPAggregateReduction(
5534         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5535         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5536           emitReductionCombiner(CGF, ReductionOp);
5537         });
5538   } else {
5539     // Emit reduction for array subscript or single variable.
5540     emitReductionCombiner(CGF, ReductionOp);
5541   }
5542 }
5543 
5544 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5545                                     ArrayRef<const Expr *> Privates,
5546                                     ArrayRef<const Expr *> LHSExprs,
5547                                     ArrayRef<const Expr *> RHSExprs,
5548                                     ArrayRef<const Expr *> ReductionOps,
5549                                     ReductionOptionsTy Options) {
5550   if (!CGF.HaveInsertPoint())
5551     return;
5552 
5553   bool WithNowait = Options.WithNowait;
5554   bool SimpleReduction = Options.SimpleReduction;
5555 
5556   // Next code should be emitted for reduction:
5557   //
5558   // static kmp_critical_name lock = { 0 };
5559   //
5560   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5561   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5562   //  ...
5563   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5564   //  *(Type<n>-1*)rhs[<n>-1]);
5565   // }
5566   //
5567   // ...
5568   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5569   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5570   // RedList, reduce_func, &<lock>)) {
5571   // case 1:
5572   //  ...
5573   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5574   //  ...
5575   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5576   // break;
5577   // case 2:
5578   //  ...
5579   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5580   //  ...
5581   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5582   // break;
5583   // default:;
5584   // }
5585   //
5586   // if SimpleReduction is true, only the next code is generated:
5587   //  ...
5588   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5589   //  ...
5590 
5591   ASTContext &C = CGM.getContext();
5592 
5593   if (SimpleReduction) {
5594     CodeGenFunction::RunCleanupsScope Scope(CGF);
5595     const auto *IPriv = Privates.begin();
5596     const auto *ILHS = LHSExprs.begin();
5597     const auto *IRHS = RHSExprs.begin();
5598     for (const Expr *E : ReductionOps) {
5599       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5600                                   cast<DeclRefExpr>(*IRHS));
5601       ++IPriv;
5602       ++ILHS;
5603       ++IRHS;
5604     }
5605     return;
5606   }
5607 
5608   // 1. Build a list of reduction variables.
5609   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5610   auto Size = RHSExprs.size();
5611   for (const Expr *E : Privates) {
5612     if (E->getType()->isVariablyModifiedType())
5613       // Reserve place for array size.
5614       ++Size;
5615   }
5616   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5617   QualType ReductionArrayTy =
5618       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5619                              /*IndexTypeQuals=*/0);
5620   Address ReductionList =
5621       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5622   const auto *IPriv = Privates.begin();
5623   unsigned Idx = 0;
5624   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5625     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5626     CGF.Builder.CreateStore(
5627         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5628             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5629         Elem);
5630     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5631       // Store array size.
5632       ++Idx;
5633       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5634       llvm::Value *Size = CGF.Builder.CreateIntCast(
5635           CGF.getVLASize(
5636                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5637               .NumElts,
5638           CGF.SizeTy, /*isSigned=*/false);
5639       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5640                               Elem);
5641     }
5642   }
5643 
5644   // 2. Emit reduce_func().
5645   llvm::Function *ReductionFn = emitReductionFunction(
5646       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5647       LHSExprs, RHSExprs, ReductionOps);
5648 
5649   // 3. Create static kmp_critical_name lock = { 0 };
5650   std::string Name = getName({"reduction"});
5651   llvm::Value *Lock = getCriticalRegionLock(Name);
5652 
5653   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5654   // RedList, reduce_func, &<lock>);
5655   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5656   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5657   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5658   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5659       ReductionList.getPointer(), CGF.VoidPtrTy);
5660   llvm::Value *Args[] = {
5661       IdentTLoc,                             // ident_t *<loc>
5662       ThreadId,                              // i32 <gtid>
5663       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5664       ReductionArrayTySize,                  // size_type sizeof(RedList)
5665       RL,                                    // void *RedList
5666       ReductionFn, // void (*) (void *, void *) <reduce_func>
5667       Lock         // kmp_critical_name *&<lock>
5668   };
5669   llvm::Value *Res = CGF.EmitRuntimeCall(
5670       OMPBuilder.getOrCreateRuntimeFunction(
5671           CGM.getModule(),
5672           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5673       Args);
5674 
5675   // 5. Build switch(res)
5676   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5677   llvm::SwitchInst *SwInst =
5678       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5679 
5680   // 6. Build case 1:
5681   //  ...
5682   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5683   //  ...
5684   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5685   // break;
5686   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5687   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5688   CGF.EmitBlock(Case1BB);
5689 
5690   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5691   llvm::Value *EndArgs[] = {
5692       IdentTLoc, // ident_t *<loc>
5693       ThreadId,  // i32 <gtid>
5694       Lock       // kmp_critical_name *&<lock>
5695   };
5696   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5697                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5698     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5699     const auto *IPriv = Privates.begin();
5700     const auto *ILHS = LHSExprs.begin();
5701     const auto *IRHS = RHSExprs.begin();
5702     for (const Expr *E : ReductionOps) {
5703       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5704                                      cast<DeclRefExpr>(*IRHS));
5705       ++IPriv;
5706       ++ILHS;
5707       ++IRHS;
5708     }
5709   };
5710   RegionCodeGenTy RCG(CodeGen);
5711   CommonActionTy Action(
5712       nullptr, llvm::None,
5713       OMPBuilder.getOrCreateRuntimeFunction(
5714           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5715                                       : OMPRTL___kmpc_end_reduce),
5716       EndArgs);
5717   RCG.setAction(Action);
5718   RCG(CGF);
5719 
5720   CGF.EmitBranch(DefaultBB);
5721 
5722   // 7. Build case 2:
5723   //  ...
5724   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5725   //  ...
5726   // break;
5727   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5728   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5729   CGF.EmitBlock(Case2BB);
5730 
5731   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5732                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5733     const auto *ILHS = LHSExprs.begin();
5734     const auto *IRHS = RHSExprs.begin();
5735     const auto *IPriv = Privates.begin();
5736     for (const Expr *E : ReductionOps) {
5737       const Expr *XExpr = nullptr;
5738       const Expr *EExpr = nullptr;
5739       const Expr *UpExpr = nullptr;
5740       BinaryOperatorKind BO = BO_Comma;
5741       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5742         if (BO->getOpcode() == BO_Assign) {
5743           XExpr = BO->getLHS();
5744           UpExpr = BO->getRHS();
5745         }
5746       }
5747       // Try to emit update expression as a simple atomic.
5748       const Expr *RHSExpr = UpExpr;
5749       if (RHSExpr) {
5750         // Analyze RHS part of the whole expression.
5751         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5752                 RHSExpr->IgnoreParenImpCasts())) {
5753           // If this is a conditional operator, analyze its condition for
5754           // min/max reduction operator.
5755           RHSExpr = ACO->getCond();
5756         }
5757         if (const auto *BORHS =
5758                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5759           EExpr = BORHS->getRHS();
5760           BO = BORHS->getOpcode();
5761         }
5762       }
5763       if (XExpr) {
5764         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5765         auto &&AtomicRedGen = [BO, VD,
5766                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5767                                     const Expr *EExpr, const Expr *UpExpr) {
5768           LValue X = CGF.EmitLValue(XExpr);
5769           RValue E;
5770           if (EExpr)
5771             E = CGF.EmitAnyExpr(EExpr);
5772           CGF.EmitOMPAtomicSimpleUpdateExpr(
5773               X, E, BO, /*IsXLHSInRHSPart=*/true,
5774               llvm::AtomicOrdering::Monotonic, Loc,
5775               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5776                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5777                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5778                 CGF.emitOMPSimpleStore(
5779                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5780                     VD->getType().getNonReferenceType(), Loc);
5781                 PrivateScope.addPrivate(VD, LHSTemp);
5782                 (void)PrivateScope.Privatize();
5783                 return CGF.EmitAnyExpr(UpExpr);
5784               });
5785         };
5786         if ((*IPriv)->getType()->isArrayType()) {
5787           // Emit atomic reduction for array section.
5788           const auto *RHSVar =
5789               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5790           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5791                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5792         } else {
5793           // Emit atomic reduction for array subscript or single variable.
5794           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5795         }
5796       } else {
5797         // Emit as a critical region.
5798         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5799                                            const Expr *, const Expr *) {
5800           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5801           std::string Name = RT.getName({"atomic_reduction"});
5802           RT.emitCriticalRegion(
5803               CGF, Name,
5804               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5805                 Action.Enter(CGF);
5806                 emitReductionCombiner(CGF, E);
5807               },
5808               Loc);
5809         };
5810         if ((*IPriv)->getType()->isArrayType()) {
5811           const auto *LHSVar =
5812               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5813           const auto *RHSVar =
5814               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5815           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5816                                     CritRedGen);
5817         } else {
5818           CritRedGen(CGF, nullptr, nullptr, nullptr);
5819         }
5820       }
5821       ++ILHS;
5822       ++IRHS;
5823       ++IPriv;
5824     }
5825   };
5826   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5827   if (!WithNowait) {
5828     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5829     llvm::Value *EndArgs[] = {
5830         IdentTLoc, // ident_t *<loc>
5831         ThreadId,  // i32 <gtid>
5832         Lock       // kmp_critical_name *&<lock>
5833     };
5834     CommonActionTy Action(nullptr, llvm::None,
5835                           OMPBuilder.getOrCreateRuntimeFunction(
5836                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5837                           EndArgs);
5838     AtomicRCG.setAction(Action);
5839     AtomicRCG(CGF);
5840   } else {
5841     AtomicRCG(CGF);
5842   }
5843 
5844   CGF.EmitBranch(DefaultBB);
5845   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5846 }
5847 
5848 /// Generates unique name for artificial threadprivate variables.
5849 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5850 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5851                                       const Expr *Ref) {
5852   SmallString<256> Buffer;
5853   llvm::raw_svector_ostream Out(Buffer);
5854   const clang::DeclRefExpr *DE;
5855   const VarDecl *D = ::getBaseDecl(Ref, DE);
5856   if (!D)
5857     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5858   D = D->getCanonicalDecl();
5859   std::string Name = CGM.getOpenMPRuntime().getName(
5860       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5861   Out << Prefix << Name << "_"
5862       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5863   return std::string(Out.str());
5864 }
5865 
5866 /// Emits reduction initializer function:
5867 /// \code
5868 /// void @.red_init(void* %arg, void* %orig) {
5869 /// %0 = bitcast void* %arg to <type>*
5870 /// store <type> <init>, <type>* %0
5871 /// ret void
5872 /// }
5873 /// \endcode
5874 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5875                                            SourceLocation Loc,
5876                                            ReductionCodeGen &RCG, unsigned N) {
5877   ASTContext &C = CGM.getContext();
5878   QualType VoidPtrTy = C.VoidPtrTy;
5879   VoidPtrTy.addRestrict();
5880   FunctionArgList Args;
5881   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5882                           ImplicitParamDecl::Other);
5883   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5884                               ImplicitParamDecl::Other);
5885   Args.emplace_back(&Param);
5886   Args.emplace_back(&ParamOrig);
5887   const auto &FnInfo =
5888       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5889   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5890   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5891   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5892                                     Name, &CGM.getModule());
5893   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5894   Fn->setDoesNotRecurse();
5895   CodeGenFunction CGF(CGM);
5896   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5897   Address PrivateAddr = CGF.EmitLoadOfPointer(
5898       CGF.GetAddrOfLocalVar(&Param),
5899       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5900   llvm::Value *Size = nullptr;
5901   // If the size of the reduction item is non-constant, load it from global
5902   // threadprivate variable.
5903   if (RCG.getSizes(N).second) {
5904     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5905         CGF, CGM.getContext().getSizeType(),
5906         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5907     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5908                                 CGM.getContext().getSizeType(), Loc);
5909   }
5910   RCG.emitAggregateType(CGF, N, Size);
5911   Address OrigAddr = Address::invalid();
5912   // If initializer uses initializer from declare reduction construct, emit a
5913   // pointer to the address of the original reduction item (reuired by reduction
5914   // initializer)
5915   if (RCG.usesReductionInitializer(N)) {
5916     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5917     OrigAddr = CGF.EmitLoadOfPointer(
5918         SharedAddr,
5919         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5920   }
5921   // Emit the initializer:
5922   // %0 = bitcast void* %arg to <type>*
5923   // store <type> <init>, <type>* %0
5924   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5925                          [](CodeGenFunction &) { return false; });
5926   CGF.FinishFunction();
5927   return Fn;
5928 }
5929 
5930 /// Emits reduction combiner function:
5931 /// \code
5932 /// void @.red_comb(void* %arg0, void* %arg1) {
5933 /// %lhs = bitcast void* %arg0 to <type>*
5934 /// %rhs = bitcast void* %arg1 to <type>*
5935 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5936 /// store <type> %2, <type>* %lhs
5937 /// ret void
5938 /// }
5939 /// \endcode
5940 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5941                                            SourceLocation Loc,
5942                                            ReductionCodeGen &RCG, unsigned N,
5943                                            const Expr *ReductionOp,
5944                                            const Expr *LHS, const Expr *RHS,
5945                                            const Expr *PrivateRef) {
5946   ASTContext &C = CGM.getContext();
5947   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5948   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5949   FunctionArgList Args;
5950   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5951                                C.VoidPtrTy, ImplicitParamDecl::Other);
5952   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5953                             ImplicitParamDecl::Other);
5954   Args.emplace_back(&ParamInOut);
5955   Args.emplace_back(&ParamIn);
5956   const auto &FnInfo =
5957       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5958   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5959   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5960   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5961                                     Name, &CGM.getModule());
5962   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5963   Fn->setDoesNotRecurse();
5964   CodeGenFunction CGF(CGM);
5965   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5966   llvm::Value *Size = nullptr;
5967   // If the size of the reduction item is non-constant, load it from global
5968   // threadprivate variable.
5969   if (RCG.getSizes(N).second) {
5970     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5971         CGF, CGM.getContext().getSizeType(),
5972         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5973     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5974                                 CGM.getContext().getSizeType(), Loc);
5975   }
5976   RCG.emitAggregateType(CGF, N, Size);
5977   // Remap lhs and rhs variables to the addresses of the function arguments.
5978   // %lhs = bitcast void* %arg0 to <type>*
5979   // %rhs = bitcast void* %arg1 to <type>*
5980   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5981   PrivateScope.addPrivate(
5982       LHSVD,
5983       // Pull out the pointer to the variable.
5984       CGF.Builder.CreateElementBitCast(
5985           CGF.EmitLoadOfPointer(
5986               CGF.GetAddrOfLocalVar(&ParamInOut),
5987               C.getPointerType(C.VoidPtrTy).castAs<PointerType>()),
5988           CGF.ConvertTypeForMem(LHSVD->getType())));
5989   PrivateScope.addPrivate(
5990       RHSVD,
5991       // Pull out the pointer to the variable.
5992       CGF.Builder.CreateElementBitCast(
5993           CGF.EmitLoadOfPointer(
5994               CGF.GetAddrOfLocalVar(&ParamIn),
5995               C.getPointerType(C.VoidPtrTy).castAs<PointerType>()),
5996           CGF.ConvertTypeForMem(RHSVD->getType())));
5997   PrivateScope.Privatize();
5998   // Emit the combiner body:
5999   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6000   // store <type> %2, <type>* %lhs
6001   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6002       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6003       cast<DeclRefExpr>(RHS));
6004   CGF.FinishFunction();
6005   return Fn;
6006 }
6007 
6008 /// Emits reduction finalizer function:
6009 /// \code
6010 /// void @.red_fini(void* %arg) {
6011 /// %0 = bitcast void* %arg to <type>*
6012 /// <destroy>(<type>* %0)
6013 /// ret void
6014 /// }
6015 /// \endcode
6016 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6017                                            SourceLocation Loc,
6018                                            ReductionCodeGen &RCG, unsigned N) {
6019   if (!RCG.needCleanups(N))
6020     return nullptr;
6021   ASTContext &C = CGM.getContext();
6022   FunctionArgList Args;
6023   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6024                           ImplicitParamDecl::Other);
6025   Args.emplace_back(&Param);
6026   const auto &FnInfo =
6027       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6028   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6029   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6030   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6031                                     Name, &CGM.getModule());
6032   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6033   Fn->setDoesNotRecurse();
6034   CodeGenFunction CGF(CGM);
6035   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6036   Address PrivateAddr = CGF.EmitLoadOfPointer(
6037       CGF.GetAddrOfLocalVar(&Param),
6038       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6039   llvm::Value *Size = nullptr;
6040   // If the size of the reduction item is non-constant, load it from global
6041   // threadprivate variable.
6042   if (RCG.getSizes(N).second) {
6043     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6044         CGF, CGM.getContext().getSizeType(),
6045         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6046     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6047                                 CGM.getContext().getSizeType(), Loc);
6048   }
6049   RCG.emitAggregateType(CGF, N, Size);
6050   // Emit the finalizer body:
6051   // <destroy>(<type>* %0)
6052   RCG.emitCleanups(CGF, N, PrivateAddr);
6053   CGF.FinishFunction(Loc);
6054   return Fn;
6055 }
6056 
6057 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6058     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6059     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6060   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6061     return nullptr;
6062 
6063   // Build typedef struct:
6064   // kmp_taskred_input {
6065   //   void *reduce_shar; // shared reduction item
6066   //   void *reduce_orig; // original reduction item used for initialization
6067   //   size_t reduce_size; // size of data item
6068   //   void *reduce_init; // data initialization routine
6069   //   void *reduce_fini; // data finalization routine
6070   //   void *reduce_comb; // data combiner routine
6071   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6072   // } kmp_taskred_input_t;
6073   ASTContext &C = CGM.getContext();
6074   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6075   RD->startDefinition();
6076   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6077   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6078   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6079   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6080   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6081   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6082   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6083       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6084   RD->completeDefinition();
6085   QualType RDType = C.getRecordType(RD);
6086   unsigned Size = Data.ReductionVars.size();
6087   llvm::APInt ArraySize(/*numBits=*/64, Size);
6088   QualType ArrayRDType = C.getConstantArrayType(
6089       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6090   // kmp_task_red_input_t .rd_input.[Size];
6091   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6092   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6093                        Data.ReductionCopies, Data.ReductionOps);
6094   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6095     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6096     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6097                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6098     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6099         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6100         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6101         ".rd_input.gep.");
6102     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6103     // ElemLVal.reduce_shar = &Shareds[Cnt];
6104     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6105     RCG.emitSharedOrigLValue(CGF, Cnt);
6106     llvm::Value *CastedShared =
6107         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6108     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6109     // ElemLVal.reduce_orig = &Origs[Cnt];
6110     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6111     llvm::Value *CastedOrig =
6112         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6113     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6114     RCG.emitAggregateType(CGF, Cnt);
6115     llvm::Value *SizeValInChars;
6116     llvm::Value *SizeVal;
6117     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6118     // We use delayed creation/initialization for VLAs and array sections. It is
6119     // required because runtime does not provide the way to pass the sizes of
6120     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6121     // threadprivate global variables are used to store these values and use
6122     // them in the functions.
6123     bool DelayedCreation = !!SizeVal;
6124     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6125                                                /*isSigned=*/false);
6126     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6127     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6128     // ElemLVal.reduce_init = init;
6129     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6130     llvm::Value *InitAddr =
6131         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6132     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6133     // ElemLVal.reduce_fini = fini;
6134     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6135     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6136     llvm::Value *FiniAddr = Fini
6137                                 ? CGF.EmitCastToVoidPtr(Fini)
6138                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6139     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6140     // ElemLVal.reduce_comb = comb;
6141     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6142     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6143         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6144         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6145     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6146     // ElemLVal.flags = 0;
6147     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6148     if (DelayedCreation) {
6149       CGF.EmitStoreOfScalar(
6150           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6151           FlagsLVal);
6152     } else
6153       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6154                                  FlagsLVal.getType());
6155   }
6156   if (Data.IsReductionWithTaskMod) {
6157     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6158     // is_ws, int num, void *data);
6159     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6160     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6161                                                   CGM.IntTy, /*isSigned=*/true);
6162     llvm::Value *Args[] = {
6163         IdentTLoc, GTid,
6164         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6165                                /*isSigned=*/true),
6166         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6167         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6168             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6169     return CGF.EmitRuntimeCall(
6170         OMPBuilder.getOrCreateRuntimeFunction(
6171             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6172         Args);
6173   }
6174   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6175   llvm::Value *Args[] = {
6176       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6177                                 /*isSigned=*/true),
6178       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6179       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6180                                                       CGM.VoidPtrTy)};
6181   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6182                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6183                              Args);
6184 }
6185 
6186 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6187                                             SourceLocation Loc,
6188                                             bool IsWorksharingReduction) {
6189   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6190   // is_ws, int num, void *data);
6191   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6192   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6193                                                 CGM.IntTy, /*isSigned=*/true);
6194   llvm::Value *Args[] = {IdentTLoc, GTid,
6195                          llvm::ConstantInt::get(CGM.IntTy,
6196                                                 IsWorksharingReduction ? 1 : 0,
6197                                                 /*isSigned=*/true)};
6198   (void)CGF.EmitRuntimeCall(
6199       OMPBuilder.getOrCreateRuntimeFunction(
6200           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6201       Args);
6202 }
6203 
6204 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6205                                               SourceLocation Loc,
6206                                               ReductionCodeGen &RCG,
6207                                               unsigned N) {
6208   auto Sizes = RCG.getSizes(N);
6209   // Emit threadprivate global variable if the type is non-constant
6210   // (Sizes.second = nullptr).
6211   if (Sizes.second) {
6212     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6213                                                      /*isSigned=*/false);
6214     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6215         CGF, CGM.getContext().getSizeType(),
6216         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6217     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6218   }
6219 }
6220 
6221 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6222                                               SourceLocation Loc,
6223                                               llvm::Value *ReductionsPtr,
6224                                               LValue SharedLVal) {
6225   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6226   // *d);
6227   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6228                                                    CGM.IntTy,
6229                                                    /*isSigned=*/true),
6230                          ReductionsPtr,
6231                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6232                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6233   return Address::deprecated(
6234       CGF.EmitRuntimeCall(
6235           OMPBuilder.getOrCreateRuntimeFunction(
6236               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6237           Args),
6238       SharedLVal.getAlignment());
6239 }
6240 
6241 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6242                                        const OMPTaskDataTy &Data) {
6243   if (!CGF.HaveInsertPoint())
6244     return;
6245 
6246   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6247     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6248     OMPBuilder.createTaskwait(CGF.Builder);
6249   } else {
6250     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6251     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6252     auto &M = CGM.getModule();
6253     Address DependenciesArray = Address::invalid();
6254     llvm::Value *NumOfElements;
6255     std::tie(NumOfElements, DependenciesArray) =
6256         emitDependClause(CGF, Data.Dependences, Loc);
6257     llvm::Value *DepWaitTaskArgs[6];
6258     if (!Data.Dependences.empty()) {
6259       DepWaitTaskArgs[0] = UpLoc;
6260       DepWaitTaskArgs[1] = ThreadID;
6261       DepWaitTaskArgs[2] = NumOfElements;
6262       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6263       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6264       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6265 
6266       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6267 
6268       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6269       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6270       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6271       // is specified.
6272       CGF.EmitRuntimeCall(
6273           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6274           DepWaitTaskArgs);
6275 
6276     } else {
6277 
6278       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6279       // global_tid);
6280       llvm::Value *Args[] = {UpLoc, ThreadID};
6281       // Ignore return result until untied tasks are supported.
6282       CGF.EmitRuntimeCall(
6283           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6284           Args);
6285     }
6286   }
6287 
6288   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6289     Region->emitUntiedSwitch(CGF);
6290 }
6291 
6292 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6293                                            OpenMPDirectiveKind InnerKind,
6294                                            const RegionCodeGenTy &CodeGen,
6295                                            bool HasCancel) {
6296   if (!CGF.HaveInsertPoint())
6297     return;
6298   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6299                                  InnerKind != OMPD_critical &&
6300                                      InnerKind != OMPD_master &&
6301                                      InnerKind != OMPD_masked);
6302   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6303 }
6304 
6305 namespace {
6306 enum RTCancelKind {
6307   CancelNoreq = 0,
6308   CancelParallel = 1,
6309   CancelLoop = 2,
6310   CancelSections = 3,
6311   CancelTaskgroup = 4
6312 };
6313 } // anonymous namespace
6314 
6315 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6316   RTCancelKind CancelKind = CancelNoreq;
6317   if (CancelRegion == OMPD_parallel)
6318     CancelKind = CancelParallel;
6319   else if (CancelRegion == OMPD_for)
6320     CancelKind = CancelLoop;
6321   else if (CancelRegion == OMPD_sections)
6322     CancelKind = CancelSections;
6323   else {
6324     assert(CancelRegion == OMPD_taskgroup);
6325     CancelKind = CancelTaskgroup;
6326   }
6327   return CancelKind;
6328 }
6329 
6330 void CGOpenMPRuntime::emitCancellationPointCall(
6331     CodeGenFunction &CGF, SourceLocation Loc,
6332     OpenMPDirectiveKind CancelRegion) {
6333   if (!CGF.HaveInsertPoint())
6334     return;
6335   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6336   // global_tid, kmp_int32 cncl_kind);
6337   if (auto *OMPRegionInfo =
6338           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6339     // For 'cancellation point taskgroup', the task region info may not have a
6340     // cancel. This may instead happen in another adjacent task.
6341     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6342       llvm::Value *Args[] = {
6343           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6344           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6345       // Ignore return result until untied tasks are supported.
6346       llvm::Value *Result = CGF.EmitRuntimeCall(
6347           OMPBuilder.getOrCreateRuntimeFunction(
6348               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6349           Args);
6350       // if (__kmpc_cancellationpoint()) {
6351       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6352       //   exit from construct;
6353       // }
6354       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6355       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6356       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6357       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6358       CGF.EmitBlock(ExitBB);
6359       if (CancelRegion == OMPD_parallel)
6360         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6361       // exit from construct;
6362       CodeGenFunction::JumpDest CancelDest =
6363           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6364       CGF.EmitBranchThroughCleanup(CancelDest);
6365       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6366     }
6367   }
6368 }
6369 
6370 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6371                                      const Expr *IfCond,
6372                                      OpenMPDirectiveKind CancelRegion) {
6373   if (!CGF.HaveInsertPoint())
6374     return;
6375   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6376   // kmp_int32 cncl_kind);
6377   auto &M = CGM.getModule();
6378   if (auto *OMPRegionInfo =
6379           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6380     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6381                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6382       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6383       llvm::Value *Args[] = {
6384           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6385           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6386       // Ignore return result until untied tasks are supported.
6387       llvm::Value *Result = CGF.EmitRuntimeCall(
6388           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6389       // if (__kmpc_cancel()) {
6390       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6391       //   exit from construct;
6392       // }
6393       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6394       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6395       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6396       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6397       CGF.EmitBlock(ExitBB);
6398       if (CancelRegion == OMPD_parallel)
6399         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6400       // exit from construct;
6401       CodeGenFunction::JumpDest CancelDest =
6402           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6403       CGF.EmitBranchThroughCleanup(CancelDest);
6404       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6405     };
6406     if (IfCond) {
6407       emitIfClause(CGF, IfCond, ThenGen,
6408                    [](CodeGenFunction &, PrePostActionTy &) {});
6409     } else {
6410       RegionCodeGenTy ThenRCG(ThenGen);
6411       ThenRCG(CGF);
6412     }
6413   }
6414 }
6415 
6416 namespace {
6417 /// Cleanup action for uses_allocators support.
6418 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6419   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6420 
6421 public:
6422   OMPUsesAllocatorsActionTy(
6423       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6424       : Allocators(Allocators) {}
6425   void Enter(CodeGenFunction &CGF) override {
6426     if (!CGF.HaveInsertPoint())
6427       return;
6428     for (const auto &AllocatorData : Allocators) {
6429       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6430           CGF, AllocatorData.first, AllocatorData.second);
6431     }
6432   }
6433   void Exit(CodeGenFunction &CGF) override {
6434     if (!CGF.HaveInsertPoint())
6435       return;
6436     for (const auto &AllocatorData : Allocators) {
6437       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6438                                                         AllocatorData.first);
6439     }
6440   }
6441 };
6442 } // namespace
6443 
6444 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6445     const OMPExecutableDirective &D, StringRef ParentName,
6446     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6447     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6448   assert(!ParentName.empty() && "Invalid target region parent name!");
6449   HasEmittedTargetRegion = true;
6450   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6451   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6452     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6453       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6454       if (!D.AllocatorTraits)
6455         continue;
6456       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6457     }
6458   }
6459   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6460   CodeGen.setAction(UsesAllocatorAction);
6461   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6462                                    IsOffloadEntry, CodeGen);
6463 }
6464 
6465 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6466                                              const Expr *Allocator,
6467                                              const Expr *AllocatorTraits) {
6468   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6469   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6470   // Use default memspace handle.
6471   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6472   llvm::Value *NumTraits = llvm::ConstantInt::get(
6473       CGF.IntTy, cast<ConstantArrayType>(
6474                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6475                      ->getSize()
6476                      .getLimitedValue());
6477   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6478   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6479       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6480   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6481                                            AllocatorTraitsLVal.getBaseInfo(),
6482                                            AllocatorTraitsLVal.getTBAAInfo());
6483   llvm::Value *Traits =
6484       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6485 
6486   llvm::Value *AllocatorVal =
6487       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6488                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6489                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6490   // Store to allocator.
6491   CGF.EmitVarDecl(*cast<VarDecl>(
6492       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6493   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6494   AllocatorVal =
6495       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6496                                Allocator->getType(), Allocator->getExprLoc());
6497   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6498 }
6499 
6500 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6501                                              const Expr *Allocator) {
6502   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6503   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6504   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6505   llvm::Value *AllocatorVal =
6506       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6507   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6508                                           CGF.getContext().VoidPtrTy,
6509                                           Allocator->getExprLoc());
6510   (void)CGF.EmitRuntimeCall(
6511       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6512                                             OMPRTL___kmpc_destroy_allocator),
6513       {ThreadId, AllocatorVal});
6514 }
6515 
6516 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6517     const OMPExecutableDirective &D, StringRef ParentName,
6518     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6519     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6520   // Create a unique name for the entry function using the source location
6521   // information of the current target region. The name will be something like:
6522   //
6523   // __omp_offloading_DD_FFFF_PP_lBB
6524   //
6525   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6526   // mangled name of the function that encloses the target region and BB is the
6527   // line number of the target region.
6528 
6529   const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6530                                !CGM.getLangOpts().OpenMPOffloadMandatory;
6531   unsigned DeviceID;
6532   unsigned FileID;
6533   unsigned Line;
6534   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6535                            Line);
6536   SmallString<64> EntryFnName;
6537   {
6538     llvm::raw_svector_ostream OS(EntryFnName);
6539     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6540        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6541   }
6542 
6543   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6544 
6545   CodeGenFunction CGF(CGM, true);
6546   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6547   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6548 
6549   if (BuildOutlinedFn)
6550     OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6551 
6552   // If this target outline function is not an offload entry, we don't need to
6553   // register it.
6554   if (!IsOffloadEntry)
6555     return;
6556 
6557   // The target region ID is used by the runtime library to identify the current
6558   // target region, so it only has to be unique and not necessarily point to
6559   // anything. It could be the pointer to the outlined function that implements
6560   // the target region, but we aren't using that so that the compiler doesn't
6561   // need to keep that, and could therefore inline the host function if proven
6562   // worthwhile during optimization. In the other hand, if emitting code for the
6563   // device, the ID has to be the function address so that it can retrieved from
6564   // the offloading entry and launched by the runtime library. We also mark the
6565   // outlined function to have external linkage in case we are emitting code for
6566   // the device, because these functions will be entry points to the device.
6567 
6568   if (CGM.getLangOpts().OpenMPIsDevice) {
6569     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6570     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6571     OutlinedFn->setDSOLocal(false);
6572     if (CGM.getTriple().isAMDGCN())
6573       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6574   } else {
6575     std::string Name = getName({EntryFnName, "region_id"});
6576     OutlinedFnID = new llvm::GlobalVariable(
6577         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6578         llvm::GlobalValue::WeakAnyLinkage,
6579         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6580   }
6581 
6582   // If we do not allow host fallback we still need a named address to use.
6583   llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6584   if (!BuildOutlinedFn) {
6585     assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
6586            "Named kernel already exists?");
6587     TargetRegionEntryAddr = new llvm::GlobalVariable(
6588         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6589         llvm::GlobalValue::InternalLinkage,
6590         llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6591   }
6592 
6593   // Register the information for the entry associated with this target region.
6594   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6595       DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
6596       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6597 
6598   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6599   int32_t DefaultValTeams = -1;
6600   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6601   if (DefaultValTeams > 0 && OutlinedFn) {
6602     OutlinedFn->addFnAttr("omp_target_num_teams",
6603                           std::to_string(DefaultValTeams));
6604   }
6605   int32_t DefaultValThreads = -1;
6606   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6607   if (DefaultValThreads > 0 && OutlinedFn) {
6608     OutlinedFn->addFnAttr("omp_target_thread_limit",
6609                           std::to_string(DefaultValThreads));
6610   }
6611 
6612   if (BuildOutlinedFn)
6613     CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6614 }
6615 
6616 /// Checks if the expression is constant or does not have non-trivial function
6617 /// calls.
6618 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6619   // We can skip constant expressions.
6620   // We can skip expressions with trivial calls or simple expressions.
6621   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6622           !E->hasNonTrivialCall(Ctx)) &&
6623          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6624 }
6625 
6626 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6627                                                     const Stmt *Body) {
6628   const Stmt *Child = Body->IgnoreContainers();
6629   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6630     Child = nullptr;
6631     for (const Stmt *S : C->body()) {
6632       if (const auto *E = dyn_cast<Expr>(S)) {
6633         if (isTrivial(Ctx, E))
6634           continue;
6635       }
6636       // Some of the statements can be ignored.
6637       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6638           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6639         continue;
6640       // Analyze declarations.
6641       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6642         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6643               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6644                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6645                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6646                   isa<UsingDirectiveDecl>(D) ||
6647                   isa<OMPDeclareReductionDecl>(D) ||
6648                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6649                 return true;
6650               const auto *VD = dyn_cast<VarDecl>(D);
6651               if (!VD)
6652                 return false;
6653               return VD->hasGlobalStorage() || !VD->isUsed();
6654             }))
6655           continue;
6656       }
6657       // Found multiple children - cannot get the one child only.
6658       if (Child)
6659         return nullptr;
6660       Child = S;
6661     }
6662     if (Child)
6663       Child = Child->IgnoreContainers();
6664   }
6665   return Child;
6666 }
6667 
6668 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6669     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6670     int32_t &DefaultVal) {
6671 
6672   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6673   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6674          "Expected target-based executable directive.");
6675   switch (DirectiveKind) {
6676   case OMPD_target: {
6677     const auto *CS = D.getInnermostCapturedStmt();
6678     const auto *Body =
6679         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6680     const Stmt *ChildStmt =
6681         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6682     if (const auto *NestedDir =
6683             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6684       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6685         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6686           const Expr *NumTeams =
6687               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6688           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6689             if (auto Constant =
6690                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6691               DefaultVal = Constant->getExtValue();
6692           return NumTeams;
6693         }
6694         DefaultVal = 0;
6695         return nullptr;
6696       }
6697       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6698           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6699         DefaultVal = 1;
6700         return nullptr;
6701       }
6702       DefaultVal = 1;
6703       return nullptr;
6704     }
6705     // A value of -1 is used to check if we need to emit no teams region
6706     DefaultVal = -1;
6707     return nullptr;
6708   }
6709   case OMPD_target_teams:
6710   case OMPD_target_teams_distribute:
6711   case OMPD_target_teams_distribute_simd:
6712   case OMPD_target_teams_distribute_parallel_for:
6713   case OMPD_target_teams_distribute_parallel_for_simd: {
6714     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6715       const Expr *NumTeams =
6716           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6717       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6718         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6719           DefaultVal = Constant->getExtValue();
6720       return NumTeams;
6721     }
6722     DefaultVal = 0;
6723     return nullptr;
6724   }
6725   case OMPD_target_parallel:
6726   case OMPD_target_parallel_for:
6727   case OMPD_target_parallel_for_simd:
6728   case OMPD_target_simd:
6729     DefaultVal = 1;
6730     return nullptr;
6731   case OMPD_parallel:
6732   case OMPD_for:
6733   case OMPD_parallel_for:
6734   case OMPD_parallel_master:
6735   case OMPD_parallel_sections:
6736   case OMPD_for_simd:
6737   case OMPD_parallel_for_simd:
6738   case OMPD_cancel:
6739   case OMPD_cancellation_point:
6740   case OMPD_ordered:
6741   case OMPD_threadprivate:
6742   case OMPD_allocate:
6743   case OMPD_task:
6744   case OMPD_simd:
6745   case OMPD_tile:
6746   case OMPD_unroll:
6747   case OMPD_sections:
6748   case OMPD_section:
6749   case OMPD_single:
6750   case OMPD_master:
6751   case OMPD_critical:
6752   case OMPD_taskyield:
6753   case OMPD_barrier:
6754   case OMPD_taskwait:
6755   case OMPD_taskgroup:
6756   case OMPD_atomic:
6757   case OMPD_flush:
6758   case OMPD_depobj:
6759   case OMPD_scan:
6760   case OMPD_teams:
6761   case OMPD_target_data:
6762   case OMPD_target_exit_data:
6763   case OMPD_target_enter_data:
6764   case OMPD_distribute:
6765   case OMPD_distribute_simd:
6766   case OMPD_distribute_parallel_for:
6767   case OMPD_distribute_parallel_for_simd:
6768   case OMPD_teams_distribute:
6769   case OMPD_teams_distribute_simd:
6770   case OMPD_teams_distribute_parallel_for:
6771   case OMPD_teams_distribute_parallel_for_simd:
6772   case OMPD_target_update:
6773   case OMPD_declare_simd:
6774   case OMPD_declare_variant:
6775   case OMPD_begin_declare_variant:
6776   case OMPD_end_declare_variant:
6777   case OMPD_declare_target:
6778   case OMPD_end_declare_target:
6779   case OMPD_declare_reduction:
6780   case OMPD_declare_mapper:
6781   case OMPD_taskloop:
6782   case OMPD_taskloop_simd:
6783   case OMPD_master_taskloop:
6784   case OMPD_master_taskloop_simd:
6785   case OMPD_parallel_master_taskloop:
6786   case OMPD_parallel_master_taskloop_simd:
6787   case OMPD_requires:
6788   case OMPD_metadirective:
6789   case OMPD_unknown:
6790     break;
6791   default:
6792     break;
6793   }
6794   llvm_unreachable("Unexpected directive kind.");
6795 }
6796 
6797 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6798     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6799   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6800          "Clauses associated with the teams directive expected to be emitted "
6801          "only for the host!");
6802   CGBuilderTy &Bld = CGF.Builder;
6803   int32_t DefaultNT = -1;
6804   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6805   if (NumTeams != nullptr) {
6806     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6807 
6808     switch (DirectiveKind) {
6809     case OMPD_target: {
6810       const auto *CS = D.getInnermostCapturedStmt();
6811       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6812       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6813       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6814                                                   /*IgnoreResultAssign*/ true);
6815       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6816                              /*isSigned=*/true);
6817     }
6818     case OMPD_target_teams:
6819     case OMPD_target_teams_distribute:
6820     case OMPD_target_teams_distribute_simd:
6821     case OMPD_target_teams_distribute_parallel_for:
6822     case OMPD_target_teams_distribute_parallel_for_simd: {
6823       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6824       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6825                                                   /*IgnoreResultAssign*/ true);
6826       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6827                              /*isSigned=*/true);
6828     }
6829     default:
6830       break;
6831     }
6832   } else if (DefaultNT == -1) {
6833     return nullptr;
6834   }
6835 
6836   return Bld.getInt32(DefaultNT);
6837 }
6838 
6839 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6840                                   llvm::Value *DefaultThreadLimitVal) {
6841   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6842       CGF.getContext(), CS->getCapturedStmt());
6843   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6844     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6845       llvm::Value *NumThreads = nullptr;
6846       llvm::Value *CondVal = nullptr;
6847       // Handle if clause. If if clause present, the number of threads is
6848       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6849       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6850         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6851         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6852         const OMPIfClause *IfClause = nullptr;
6853         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6854           if (C->getNameModifier() == OMPD_unknown ||
6855               C->getNameModifier() == OMPD_parallel) {
6856             IfClause = C;
6857             break;
6858           }
6859         }
6860         if (IfClause) {
6861           const Expr *Cond = IfClause->getCondition();
6862           bool Result;
6863           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6864             if (!Result)
6865               return CGF.Builder.getInt32(1);
6866           } else {
6867             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6868             if (const auto *PreInit =
6869                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6870               for (const auto *I : PreInit->decls()) {
6871                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6872                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6873                 } else {
6874                   CodeGenFunction::AutoVarEmission Emission =
6875                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6876                   CGF.EmitAutoVarCleanups(Emission);
6877                 }
6878               }
6879             }
6880             CondVal = CGF.EvaluateExprAsBool(Cond);
6881           }
6882         }
6883       }
6884       // Check the value of num_threads clause iff if clause was not specified
6885       // or is not evaluated to false.
6886       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6887         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6888         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6889         const auto *NumThreadsClause =
6890             Dir->getSingleClause<OMPNumThreadsClause>();
6891         CodeGenFunction::LexicalScope Scope(
6892             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6893         if (const auto *PreInit =
6894                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6895           for (const auto *I : PreInit->decls()) {
6896             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6897               CGF.EmitVarDecl(cast<VarDecl>(*I));
6898             } else {
6899               CodeGenFunction::AutoVarEmission Emission =
6900                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6901               CGF.EmitAutoVarCleanups(Emission);
6902             }
6903           }
6904         }
6905         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6906         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6907                                                /*isSigned=*/false);
6908         if (DefaultThreadLimitVal)
6909           NumThreads = CGF.Builder.CreateSelect(
6910               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6911               DefaultThreadLimitVal, NumThreads);
6912       } else {
6913         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6914                                            : CGF.Builder.getInt32(0);
6915       }
6916       // Process condition of the if clause.
6917       if (CondVal) {
6918         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6919                                               CGF.Builder.getInt32(1));
6920       }
6921       return NumThreads;
6922     }
6923     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6924       return CGF.Builder.getInt32(1);
6925     return DefaultThreadLimitVal;
6926   }
6927   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6928                                : CGF.Builder.getInt32(0);
6929 }
6930 
6931 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6932     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6933     int32_t &DefaultVal) {
6934   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6935   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6936          "Expected target-based executable directive.");
6937 
6938   switch (DirectiveKind) {
6939   case OMPD_target:
6940     // Teams have no clause thread_limit
6941     return nullptr;
6942   case OMPD_target_teams:
6943   case OMPD_target_teams_distribute:
6944     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6945       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6946       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6947       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6948         if (auto Constant =
6949                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6950           DefaultVal = Constant->getExtValue();
6951       return ThreadLimit;
6952     }
6953     return nullptr;
6954   case OMPD_target_parallel:
6955   case OMPD_target_parallel_for:
6956   case OMPD_target_parallel_for_simd:
6957   case OMPD_target_teams_distribute_parallel_for:
6958   case OMPD_target_teams_distribute_parallel_for_simd: {
6959     Expr *ThreadLimit = nullptr;
6960     Expr *NumThreads = nullptr;
6961     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6962       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6963       ThreadLimit = ThreadLimitClause->getThreadLimit();
6964       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6965         if (auto Constant =
6966                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6967           DefaultVal = Constant->getExtValue();
6968     }
6969     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6970       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6971       NumThreads = NumThreadsClause->getNumThreads();
6972       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6973         if (auto Constant =
6974                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6975           if (Constant->getExtValue() < DefaultVal) {
6976             DefaultVal = Constant->getExtValue();
6977             ThreadLimit = NumThreads;
6978           }
6979         }
6980       }
6981     }
6982     return ThreadLimit;
6983   }
6984   case OMPD_target_teams_distribute_simd:
6985   case OMPD_target_simd:
6986     DefaultVal = 1;
6987     return nullptr;
6988   case OMPD_parallel:
6989   case OMPD_for:
6990   case OMPD_parallel_for:
6991   case OMPD_parallel_master:
6992   case OMPD_parallel_sections:
6993   case OMPD_for_simd:
6994   case OMPD_parallel_for_simd:
6995   case OMPD_cancel:
6996   case OMPD_cancellation_point:
6997   case OMPD_ordered:
6998   case OMPD_threadprivate:
6999   case OMPD_allocate:
7000   case OMPD_task:
7001   case OMPD_simd:
7002   case OMPD_tile:
7003   case OMPD_unroll:
7004   case OMPD_sections:
7005   case OMPD_section:
7006   case OMPD_single:
7007   case OMPD_master:
7008   case OMPD_critical:
7009   case OMPD_taskyield:
7010   case OMPD_barrier:
7011   case OMPD_taskwait:
7012   case OMPD_taskgroup:
7013   case OMPD_atomic:
7014   case OMPD_flush:
7015   case OMPD_depobj:
7016   case OMPD_scan:
7017   case OMPD_teams:
7018   case OMPD_target_data:
7019   case OMPD_target_exit_data:
7020   case OMPD_target_enter_data:
7021   case OMPD_distribute:
7022   case OMPD_distribute_simd:
7023   case OMPD_distribute_parallel_for:
7024   case OMPD_distribute_parallel_for_simd:
7025   case OMPD_teams_distribute:
7026   case OMPD_teams_distribute_simd:
7027   case OMPD_teams_distribute_parallel_for:
7028   case OMPD_teams_distribute_parallel_for_simd:
7029   case OMPD_target_update:
7030   case OMPD_declare_simd:
7031   case OMPD_declare_variant:
7032   case OMPD_begin_declare_variant:
7033   case OMPD_end_declare_variant:
7034   case OMPD_declare_target:
7035   case OMPD_end_declare_target:
7036   case OMPD_declare_reduction:
7037   case OMPD_declare_mapper:
7038   case OMPD_taskloop:
7039   case OMPD_taskloop_simd:
7040   case OMPD_master_taskloop:
7041   case OMPD_master_taskloop_simd:
7042   case OMPD_parallel_master_taskloop:
7043   case OMPD_parallel_master_taskloop_simd:
7044   case OMPD_requires:
7045   case OMPD_unknown:
7046     break;
7047   default:
7048     break;
7049   }
7050   llvm_unreachable("Unsupported directive kind.");
7051 }
7052 
7053 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7054     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7055   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7056          "Clauses associated with the teams directive expected to be emitted "
7057          "only for the host!");
7058   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7059   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7060          "Expected target-based executable directive.");
7061   CGBuilderTy &Bld = CGF.Builder;
7062   llvm::Value *ThreadLimitVal = nullptr;
7063   llvm::Value *NumThreadsVal = nullptr;
7064   switch (DirectiveKind) {
7065   case OMPD_target: {
7066     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7067     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7068       return NumThreads;
7069     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7070         CGF.getContext(), CS->getCapturedStmt());
7071     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7072       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7073         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7074         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7075         const auto *ThreadLimitClause =
7076             Dir->getSingleClause<OMPThreadLimitClause>();
7077         CodeGenFunction::LexicalScope Scope(
7078             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7079         if (const auto *PreInit =
7080                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7081           for (const auto *I : PreInit->decls()) {
7082             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7083               CGF.EmitVarDecl(cast<VarDecl>(*I));
7084             } else {
7085               CodeGenFunction::AutoVarEmission Emission =
7086                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7087               CGF.EmitAutoVarCleanups(Emission);
7088             }
7089           }
7090         }
7091         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7092             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7093         ThreadLimitVal =
7094             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7095       }
7096       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7097           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7098         CS = Dir->getInnermostCapturedStmt();
7099         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7100             CGF.getContext(), CS->getCapturedStmt());
7101         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7102       }
7103       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7104           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7105         CS = Dir->getInnermostCapturedStmt();
7106         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7107           return NumThreads;
7108       }
7109       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7110         return Bld.getInt32(1);
7111     }
7112     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7113   }
7114   case OMPD_target_teams: {
7115     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7116       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7117       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7118       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7119           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7120       ThreadLimitVal =
7121           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7122     }
7123     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7124     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7125       return NumThreads;
7126     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7127         CGF.getContext(), CS->getCapturedStmt());
7128     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7129       if (Dir->getDirectiveKind() == OMPD_distribute) {
7130         CS = Dir->getInnermostCapturedStmt();
7131         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7132           return NumThreads;
7133       }
7134     }
7135     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7136   }
7137   case OMPD_target_teams_distribute:
7138     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7139       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7140       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7141       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7142           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7143       ThreadLimitVal =
7144           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7145     }
7146     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7147   case OMPD_target_parallel:
7148   case OMPD_target_parallel_for:
7149   case OMPD_target_parallel_for_simd:
7150   case OMPD_target_teams_distribute_parallel_for:
7151   case OMPD_target_teams_distribute_parallel_for_simd: {
7152     llvm::Value *CondVal = nullptr;
7153     // Handle if clause. If if clause present, the number of threads is
7154     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7155     if (D.hasClausesOfKind<OMPIfClause>()) {
7156       const OMPIfClause *IfClause = nullptr;
7157       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7158         if (C->getNameModifier() == OMPD_unknown ||
7159             C->getNameModifier() == OMPD_parallel) {
7160           IfClause = C;
7161           break;
7162         }
7163       }
7164       if (IfClause) {
7165         const Expr *Cond = IfClause->getCondition();
7166         bool Result;
7167         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7168           if (!Result)
7169             return Bld.getInt32(1);
7170         } else {
7171           CodeGenFunction::RunCleanupsScope Scope(CGF);
7172           CondVal = CGF.EvaluateExprAsBool(Cond);
7173         }
7174       }
7175     }
7176     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7177       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7178       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7179       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7180           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7181       ThreadLimitVal =
7182           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7183     }
7184     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7185       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7186       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7187       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7188           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7189       NumThreadsVal =
7190           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7191       ThreadLimitVal = ThreadLimitVal
7192                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7193                                                                 ThreadLimitVal),
7194                                               NumThreadsVal, ThreadLimitVal)
7195                            : NumThreadsVal;
7196     }
7197     if (!ThreadLimitVal)
7198       ThreadLimitVal = Bld.getInt32(0);
7199     if (CondVal)
7200       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7201     return ThreadLimitVal;
7202   }
7203   case OMPD_target_teams_distribute_simd:
7204   case OMPD_target_simd:
7205     return Bld.getInt32(1);
7206   case OMPD_parallel:
7207   case OMPD_for:
7208   case OMPD_parallel_for:
7209   case OMPD_parallel_master:
7210   case OMPD_parallel_sections:
7211   case OMPD_for_simd:
7212   case OMPD_parallel_for_simd:
7213   case OMPD_cancel:
7214   case OMPD_cancellation_point:
7215   case OMPD_ordered:
7216   case OMPD_threadprivate:
7217   case OMPD_allocate:
7218   case OMPD_task:
7219   case OMPD_simd:
7220   case OMPD_tile:
7221   case OMPD_unroll:
7222   case OMPD_sections:
7223   case OMPD_section:
7224   case OMPD_single:
7225   case OMPD_master:
7226   case OMPD_critical:
7227   case OMPD_taskyield:
7228   case OMPD_barrier:
7229   case OMPD_taskwait:
7230   case OMPD_taskgroup:
7231   case OMPD_atomic:
7232   case OMPD_flush:
7233   case OMPD_depobj:
7234   case OMPD_scan:
7235   case OMPD_teams:
7236   case OMPD_target_data:
7237   case OMPD_target_exit_data:
7238   case OMPD_target_enter_data:
7239   case OMPD_distribute:
7240   case OMPD_distribute_simd:
7241   case OMPD_distribute_parallel_for:
7242   case OMPD_distribute_parallel_for_simd:
7243   case OMPD_teams_distribute:
7244   case OMPD_teams_distribute_simd:
7245   case OMPD_teams_distribute_parallel_for:
7246   case OMPD_teams_distribute_parallel_for_simd:
7247   case OMPD_target_update:
7248   case OMPD_declare_simd:
7249   case OMPD_declare_variant:
7250   case OMPD_begin_declare_variant:
7251   case OMPD_end_declare_variant:
7252   case OMPD_declare_target:
7253   case OMPD_end_declare_target:
7254   case OMPD_declare_reduction:
7255   case OMPD_declare_mapper:
7256   case OMPD_taskloop:
7257   case OMPD_taskloop_simd:
7258   case OMPD_master_taskloop:
7259   case OMPD_master_taskloop_simd:
7260   case OMPD_parallel_master_taskloop:
7261   case OMPD_parallel_master_taskloop_simd:
7262   case OMPD_requires:
7263   case OMPD_metadirective:
7264   case OMPD_unknown:
7265     break;
7266   default:
7267     break;
7268   }
7269   llvm_unreachable("Unsupported directive kind.");
7270 }
7271 
7272 namespace {
7273 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7274 
7275 // Utility to handle information from clauses associated with a given
7276 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7277 // It provides a convenient interface to obtain the information and generate
7278 // code for that information.
7279 class MappableExprsHandler {
7280 public:
7281   /// Values for bit flags used to specify the mapping type for
7282   /// offloading.
7283   enum OpenMPOffloadMappingFlags : uint64_t {
7284     /// No flags
7285     OMP_MAP_NONE = 0x0,
7286     /// Allocate memory on the device and move data from host to device.
7287     OMP_MAP_TO = 0x01,
7288     /// Allocate memory on the device and move data from device to host.
7289     OMP_MAP_FROM = 0x02,
7290     /// Always perform the requested mapping action on the element, even
7291     /// if it was already mapped before.
7292     OMP_MAP_ALWAYS = 0x04,
7293     /// Delete the element from the device environment, ignoring the
7294     /// current reference count associated with the element.
7295     OMP_MAP_DELETE = 0x08,
7296     /// The element being mapped is a pointer-pointee pair; both the
7297     /// pointer and the pointee should be mapped.
7298     OMP_MAP_PTR_AND_OBJ = 0x10,
7299     /// This flags signals that the base address of an entry should be
7300     /// passed to the target kernel as an argument.
7301     OMP_MAP_TARGET_PARAM = 0x20,
7302     /// Signal that the runtime library has to return the device pointer
7303     /// in the current position for the data being mapped. Used when we have the
7304     /// use_device_ptr or use_device_addr clause.
7305     OMP_MAP_RETURN_PARAM = 0x40,
7306     /// This flag signals that the reference being passed is a pointer to
7307     /// private data.
7308     OMP_MAP_PRIVATE = 0x80,
7309     /// Pass the element to the device by value.
7310     OMP_MAP_LITERAL = 0x100,
7311     /// Implicit map
7312     OMP_MAP_IMPLICIT = 0x200,
7313     /// Close is a hint to the runtime to allocate memory close to
7314     /// the target device.
7315     OMP_MAP_CLOSE = 0x400,
7316     /// 0x800 is reserved for compatibility with XLC.
7317     /// Produce a runtime error if the data is not already allocated.
7318     OMP_MAP_PRESENT = 0x1000,
7319     // Increment and decrement a separate reference counter so that the data
7320     // cannot be unmapped within the associated region.  Thus, this flag is
7321     // intended to be used on 'target' and 'target data' directives because they
7322     // are inherently structured.  It is not intended to be used on 'target
7323     // enter data' and 'target exit data' directives because they are inherently
7324     // dynamic.
7325     // This is an OpenMP extension for the sake of OpenACC support.
7326     OMP_MAP_OMPX_HOLD = 0x2000,
7327     /// Signal that the runtime library should use args as an array of
7328     /// descriptor_dim pointers and use args_size as dims. Used when we have
7329     /// non-contiguous list items in target update directive
7330     OMP_MAP_NON_CONTIG = 0x100000000000,
7331     /// The 16 MSBs of the flags indicate whether the entry is member of some
7332     /// struct/class.
7333     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7334     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7335   };
7336 
7337   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7338   static unsigned getFlagMemberOffset() {
7339     unsigned Offset = 0;
7340     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7341          Remain = Remain >> 1)
7342       Offset++;
7343     return Offset;
7344   }
7345 
7346   /// Class that holds debugging information for a data mapping to be passed to
7347   /// the runtime library.
7348   class MappingExprInfo {
7349     /// The variable declaration used for the data mapping.
7350     const ValueDecl *MapDecl = nullptr;
7351     /// The original expression used in the map clause, or null if there is
7352     /// none.
7353     const Expr *MapExpr = nullptr;
7354 
7355   public:
7356     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7357         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7358 
7359     const ValueDecl *getMapDecl() const { return MapDecl; }
7360     const Expr *getMapExpr() const { return MapExpr; }
7361   };
7362 
7363   /// Class that associates information with a base pointer to be passed to the
7364   /// runtime library.
7365   class BasePointerInfo {
7366     /// The base pointer.
7367     llvm::Value *Ptr = nullptr;
7368     /// The base declaration that refers to this device pointer, or null if
7369     /// there is none.
7370     const ValueDecl *DevPtrDecl = nullptr;
7371 
7372   public:
7373     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7374         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7375     llvm::Value *operator*() const { return Ptr; }
7376     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7377     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7378   };
7379 
7380   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7381   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7382   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7383   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7384   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7385   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7386   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7387 
7388   /// This structure contains combined information generated for mappable
7389   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7390   /// mappers, and non-contiguous information.
7391   struct MapCombinedInfoTy {
7392     struct StructNonContiguousInfo {
7393       bool IsNonContiguous = false;
7394       MapDimArrayTy Dims;
7395       MapNonContiguousArrayTy Offsets;
7396       MapNonContiguousArrayTy Counts;
7397       MapNonContiguousArrayTy Strides;
7398     };
7399     MapExprsArrayTy Exprs;
7400     MapBaseValuesArrayTy BasePointers;
7401     MapValuesArrayTy Pointers;
7402     MapValuesArrayTy Sizes;
7403     MapFlagsArrayTy Types;
7404     MapMappersArrayTy Mappers;
7405     StructNonContiguousInfo NonContigInfo;
7406 
7407     /// Append arrays in \a CurInfo.
7408     void append(MapCombinedInfoTy &CurInfo) {
7409       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7410       BasePointers.append(CurInfo.BasePointers.begin(),
7411                           CurInfo.BasePointers.end());
7412       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7413       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7414       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7415       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7416       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7417                                  CurInfo.NonContigInfo.Dims.end());
7418       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7419                                     CurInfo.NonContigInfo.Offsets.end());
7420       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7421                                    CurInfo.NonContigInfo.Counts.end());
7422       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7423                                     CurInfo.NonContigInfo.Strides.end());
7424     }
7425   };
7426 
7427   /// Map between a struct and the its lowest & highest elements which have been
7428   /// mapped.
7429   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7430   ///                    HE(FieldIndex, Pointer)}
7431   struct StructRangeInfoTy {
7432     MapCombinedInfoTy PreliminaryMapData;
7433     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7434         0, Address::invalid()};
7435     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7436         0, Address::invalid()};
7437     Address Base = Address::invalid();
7438     Address LB = Address::invalid();
7439     bool IsArraySection = false;
7440     bool HasCompleteRecord = false;
7441   };
7442 
7443 private:
7444   /// Kind that defines how a device pointer has to be returned.
7445   struct MapInfo {
7446     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7447     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7448     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7449     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7450     bool ReturnDevicePointer = false;
7451     bool IsImplicit = false;
7452     const ValueDecl *Mapper = nullptr;
7453     const Expr *VarRef = nullptr;
7454     bool ForDeviceAddr = false;
7455 
7456     MapInfo() = default;
7457     MapInfo(
7458         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7459         OpenMPMapClauseKind MapType,
7460         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7461         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7462         bool ReturnDevicePointer, bool IsImplicit,
7463         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7464         bool ForDeviceAddr = false)
7465         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7466           MotionModifiers(MotionModifiers),
7467           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7468           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7469   };
7470 
7471   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7472   /// member and there is no map information about it, then emission of that
7473   /// entry is deferred until the whole struct has been processed.
7474   struct DeferredDevicePtrEntryTy {
7475     const Expr *IE = nullptr;
7476     const ValueDecl *VD = nullptr;
7477     bool ForDeviceAddr = false;
7478 
7479     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7480                              bool ForDeviceAddr)
7481         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7482   };
7483 
7484   /// The target directive from where the mappable clauses were extracted. It
7485   /// is either a executable directive or a user-defined mapper directive.
7486   llvm::PointerUnion<const OMPExecutableDirective *,
7487                      const OMPDeclareMapperDecl *>
7488       CurDir;
7489 
7490   /// Function the directive is being generated for.
7491   CodeGenFunction &CGF;
7492 
7493   /// Set of all first private variables in the current directive.
7494   /// bool data is set to true if the variable is implicitly marked as
7495   /// firstprivate, false otherwise.
7496   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7497 
7498   /// Map between device pointer declarations and their expression components.
7499   /// The key value for declarations in 'this' is null.
7500   llvm::DenseMap<
7501       const ValueDecl *,
7502       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7503       DevPointersMap;
7504 
7505   /// Map between lambda declarations and their map type.
7506   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7507 
7508   llvm::Value *getExprTypeSize(const Expr *E) const {
7509     QualType ExprTy = E->getType().getCanonicalType();
7510 
7511     // Calculate the size for array shaping expression.
7512     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7513       llvm::Value *Size =
7514           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7515       for (const Expr *SE : OAE->getDimensions()) {
7516         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7517         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7518                                       CGF.getContext().getSizeType(),
7519                                       SE->getExprLoc());
7520         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7521       }
7522       return Size;
7523     }
7524 
7525     // Reference types are ignored for mapping purposes.
7526     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7527       ExprTy = RefTy->getPointeeType().getCanonicalType();
7528 
7529     // Given that an array section is considered a built-in type, we need to
7530     // do the calculation based on the length of the section instead of relying
7531     // on CGF.getTypeSize(E->getType()).
7532     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7533       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7534                             OAE->getBase()->IgnoreParenImpCasts())
7535                             .getCanonicalType();
7536 
7537       // If there is no length associated with the expression and lower bound is
7538       // not specified too, that means we are using the whole length of the
7539       // base.
7540       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7541           !OAE->getLowerBound())
7542         return CGF.getTypeSize(BaseTy);
7543 
7544       llvm::Value *ElemSize;
7545       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7546         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7547       } else {
7548         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7549         assert(ATy && "Expecting array type if not a pointer type.");
7550         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7551       }
7552 
7553       // If we don't have a length at this point, that is because we have an
7554       // array section with a single element.
7555       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7556         return ElemSize;
7557 
7558       if (const Expr *LenExpr = OAE->getLength()) {
7559         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7560         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7561                                              CGF.getContext().getSizeType(),
7562                                              LenExpr->getExprLoc());
7563         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7564       }
7565       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7566              OAE->getLowerBound() && "expected array_section[lb:].");
7567       // Size = sizetype - lb * elemtype;
7568       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7569       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7570       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7571                                        CGF.getContext().getSizeType(),
7572                                        OAE->getLowerBound()->getExprLoc());
7573       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7574       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7575       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7576       LengthVal = CGF.Builder.CreateSelect(
7577           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7578       return LengthVal;
7579     }
7580     return CGF.getTypeSize(ExprTy);
7581   }
7582 
7583   /// Return the corresponding bits for a given map clause modifier. Add
7584   /// a flag marking the map as a pointer if requested. Add a flag marking the
7585   /// map as the first one of a series of maps that relate to the same map
7586   /// expression.
7587   OpenMPOffloadMappingFlags getMapTypeBits(
7588       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7589       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7590       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7591     OpenMPOffloadMappingFlags Bits =
7592         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7593     switch (MapType) {
7594     case OMPC_MAP_alloc:
7595     case OMPC_MAP_release:
7596       // alloc and release is the default behavior in the runtime library,  i.e.
7597       // if we don't pass any bits alloc/release that is what the runtime is
7598       // going to do. Therefore, we don't need to signal anything for these two
7599       // type modifiers.
7600       break;
7601     case OMPC_MAP_to:
7602       Bits |= OMP_MAP_TO;
7603       break;
7604     case OMPC_MAP_from:
7605       Bits |= OMP_MAP_FROM;
7606       break;
7607     case OMPC_MAP_tofrom:
7608       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7609       break;
7610     case OMPC_MAP_delete:
7611       Bits |= OMP_MAP_DELETE;
7612       break;
7613     case OMPC_MAP_unknown:
7614       llvm_unreachable("Unexpected map type!");
7615     }
7616     if (AddPtrFlag)
7617       Bits |= OMP_MAP_PTR_AND_OBJ;
7618     if (AddIsTargetParamFlag)
7619       Bits |= OMP_MAP_TARGET_PARAM;
7620     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7621       Bits |= OMP_MAP_ALWAYS;
7622     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7623       Bits |= OMP_MAP_CLOSE;
7624     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7625         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7626       Bits |= OMP_MAP_PRESENT;
7627     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7628       Bits |= OMP_MAP_OMPX_HOLD;
7629     if (IsNonContiguous)
7630       Bits |= OMP_MAP_NON_CONTIG;
7631     return Bits;
7632   }
7633 
7634   /// Return true if the provided expression is a final array section. A
7635   /// final array section, is one whose length can't be proved to be one.
7636   bool isFinalArraySectionExpression(const Expr *E) const {
7637     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7638 
7639     // It is not an array section and therefore not a unity-size one.
7640     if (!OASE)
7641       return false;
7642 
7643     // An array section with no colon always refer to a single element.
7644     if (OASE->getColonLocFirst().isInvalid())
7645       return false;
7646 
7647     const Expr *Length = OASE->getLength();
7648 
7649     // If we don't have a length we have to check if the array has size 1
7650     // for this dimension. Also, we should always expect a length if the
7651     // base type is pointer.
7652     if (!Length) {
7653       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7654                              OASE->getBase()->IgnoreParenImpCasts())
7655                              .getCanonicalType();
7656       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7657         return ATy->getSize().getSExtValue() != 1;
7658       // If we don't have a constant dimension length, we have to consider
7659       // the current section as having any size, so it is not necessarily
7660       // unitary. If it happen to be unity size, that's user fault.
7661       return true;
7662     }
7663 
7664     // Check if the length evaluates to 1.
7665     Expr::EvalResult Result;
7666     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7667       return true; // Can have more that size 1.
7668 
7669     llvm::APSInt ConstLength = Result.Val.getInt();
7670     return ConstLength.getSExtValue() != 1;
7671   }
7672 
7673   /// Generate the base pointers, section pointers, sizes, map type bits, and
7674   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7675   /// map type, map or motion modifiers, and expression components.
7676   /// \a IsFirstComponent should be set to true if the provided set of
7677   /// components is the first associated with a capture.
7678   void generateInfoForComponentList(
7679       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7680       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7681       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7682       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7683       bool IsFirstComponentList, bool IsImplicit,
7684       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7685       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7686       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7687           OverlappedElements = llvm::None) const {
7688     // The following summarizes what has to be generated for each map and the
7689     // types below. The generated information is expressed in this order:
7690     // base pointer, section pointer, size, flags
7691     // (to add to the ones that come from the map type and modifier).
7692     //
7693     // double d;
7694     // int i[100];
7695     // float *p;
7696     //
7697     // struct S1 {
7698     //   int i;
7699     //   float f[50];
7700     // }
7701     // struct S2 {
7702     //   int i;
7703     //   float f[50];
7704     //   S1 s;
7705     //   double *p;
7706     //   struct S2 *ps;
7707     //   int &ref;
7708     // }
7709     // S2 s;
7710     // S2 *ps;
7711     //
7712     // map(d)
7713     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7714     //
7715     // map(i)
7716     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7717     //
7718     // map(i[1:23])
7719     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7720     //
7721     // map(p)
7722     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7723     //
7724     // map(p[1:24])
7725     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7726     // in unified shared memory mode or for local pointers
7727     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7728     //
7729     // map(s)
7730     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7731     //
7732     // map(s.i)
7733     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7734     //
7735     // map(s.s.f)
7736     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7737     //
7738     // map(s.p)
7739     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7740     //
7741     // map(to: s.p[:22])
7742     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7743     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7744     // &(s.p), &(s.p[0]), 22*sizeof(double),
7745     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7746     // (*) alloc space for struct members, only this is a target parameter
7747     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7748     //      optimizes this entry out, same in the examples below)
7749     // (***) map the pointee (map: to)
7750     //
7751     // map(to: s.ref)
7752     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7753     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7754     // (*) alloc space for struct members, only this is a target parameter
7755     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7756     //      optimizes this entry out, same in the examples below)
7757     // (***) map the pointee (map: to)
7758     //
7759     // map(s.ps)
7760     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7761     //
7762     // map(from: s.ps->s.i)
7763     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7764     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7765     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7766     //
7767     // map(to: s.ps->ps)
7768     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7769     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7770     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7771     //
7772     // map(s.ps->ps->ps)
7773     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7774     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7775     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7776     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7777     //
7778     // map(to: s.ps->ps->s.f[:22])
7779     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7780     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7781     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7782     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7783     //
7784     // map(ps)
7785     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7786     //
7787     // map(ps->i)
7788     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7789     //
7790     // map(ps->s.f)
7791     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7792     //
7793     // map(from: ps->p)
7794     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7795     //
7796     // map(to: ps->p[:22])
7797     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7798     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7799     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7800     //
7801     // map(ps->ps)
7802     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7803     //
7804     // map(from: ps->ps->s.i)
7805     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7806     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7807     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7808     //
7809     // map(from: ps->ps->ps)
7810     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7811     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7812     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7813     //
7814     // map(ps->ps->ps->ps)
7815     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7816     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7817     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7818     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7819     //
7820     // map(to: ps->ps->ps->s.f[:22])
7821     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7822     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7823     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7824     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7825     //
7826     // map(to: s.f[:22]) map(from: s.p[:33])
7827     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7828     //     sizeof(double*) (**), TARGET_PARAM
7829     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7830     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7831     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7832     // (*) allocate contiguous space needed to fit all mapped members even if
7833     //     we allocate space for members not mapped (in this example,
7834     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7835     //     them as well because they fall between &s.f[0] and &s.p)
7836     //
7837     // map(from: s.f[:22]) map(to: ps->p[:33])
7838     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7839     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7840     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7841     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7842     // (*) the struct this entry pertains to is the 2nd element in the list of
7843     //     arguments, hence MEMBER_OF(2)
7844     //
7845     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7846     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7847     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7848     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7849     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7850     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7851     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7852     // (*) the struct this entry pertains to is the 4th element in the list
7853     //     of arguments, hence MEMBER_OF(4)
7854 
7855     // Track if the map information being generated is the first for a capture.
7856     bool IsCaptureFirstInfo = IsFirstComponentList;
7857     // When the variable is on a declare target link or in a to clause with
7858     // unified memory, a reference is needed to hold the host/device address
7859     // of the variable.
7860     bool RequiresReference = false;
7861 
7862     // Scan the components from the base to the complete expression.
7863     auto CI = Components.rbegin();
7864     auto CE = Components.rend();
7865     auto I = CI;
7866 
7867     // Track if the map information being generated is the first for a list of
7868     // components.
7869     bool IsExpressionFirstInfo = true;
7870     bool FirstPointerInComplexData = false;
7871     Address BP = Address::invalid();
7872     const Expr *AssocExpr = I->getAssociatedExpression();
7873     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7874     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7875     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7876 
7877     if (isa<MemberExpr>(AssocExpr)) {
7878       // The base is the 'this' pointer. The content of the pointer is going
7879       // to be the base of the field being mapped.
7880       BP = CGF.LoadCXXThisAddress();
7881     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7882                (OASE &&
7883                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7884       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7885     } else if (OAShE &&
7886                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7887       BP = Address::deprecated(
7888           CGF.EmitScalarExpr(OAShE->getBase()),
7889           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7890     } else {
7891       // The base is the reference to the variable.
7892       // BP = &Var.
7893       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7894       if (const auto *VD =
7895               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7896         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7897                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7898           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7899               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7900                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7901             RequiresReference = true;
7902             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7903           }
7904         }
7905       }
7906 
7907       // If the variable is a pointer and is being dereferenced (i.e. is not
7908       // the last component), the base has to be the pointer itself, not its
7909       // reference. References are ignored for mapping purposes.
7910       QualType Ty =
7911           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7912       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7913         // No need to generate individual map information for the pointer, it
7914         // can be associated with the combined storage if shared memory mode is
7915         // active or the base declaration is not global variable.
7916         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7917         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7918             !VD || VD->hasLocalStorage())
7919           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7920         else
7921           FirstPointerInComplexData = true;
7922         ++I;
7923       }
7924     }
7925 
7926     // Track whether a component of the list should be marked as MEMBER_OF some
7927     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7928     // in a component list should be marked as MEMBER_OF, all subsequent entries
7929     // do not belong to the base struct. E.g.
7930     // struct S2 s;
7931     // s.ps->ps->ps->f[:]
7932     //   (1) (2) (3) (4)
7933     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7934     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7935     // is the pointee of ps(2) which is not member of struct s, so it should not
7936     // be marked as such (it is still PTR_AND_OBJ).
7937     // The variable is initialized to false so that PTR_AND_OBJ entries which
7938     // are not struct members are not considered (e.g. array of pointers to
7939     // data).
7940     bool ShouldBeMemberOf = false;
7941 
7942     // Variable keeping track of whether or not we have encountered a component
7943     // in the component list which is a member expression. Useful when we have a
7944     // pointer or a final array section, in which case it is the previous
7945     // component in the list which tells us whether we have a member expression.
7946     // E.g. X.f[:]
7947     // While processing the final array section "[:]" it is "f" which tells us
7948     // whether we are dealing with a member of a declared struct.
7949     const MemberExpr *EncounteredME = nullptr;
7950 
7951     // Track for the total number of dimension. Start from one for the dummy
7952     // dimension.
7953     uint64_t DimSize = 1;
7954 
7955     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7956     bool IsPrevMemberReference = false;
7957 
7958     for (; I != CE; ++I) {
7959       // If the current component is member of a struct (parent struct) mark it.
7960       if (!EncounteredME) {
7961         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7962         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7963         // as MEMBER_OF the parent struct.
7964         if (EncounteredME) {
7965           ShouldBeMemberOf = true;
7966           // Do not emit as complex pointer if this is actually not array-like
7967           // expression.
7968           if (FirstPointerInComplexData) {
7969             QualType Ty = std::prev(I)
7970                               ->getAssociatedDeclaration()
7971                               ->getType()
7972                               .getNonReferenceType();
7973             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7974             FirstPointerInComplexData = false;
7975           }
7976         }
7977       }
7978 
7979       auto Next = std::next(I);
7980 
7981       // We need to generate the addresses and sizes if this is the last
7982       // component, if the component is a pointer or if it is an array section
7983       // whose length can't be proved to be one. If this is a pointer, it
7984       // becomes the base address for the following components.
7985 
7986       // A final array section, is one whose length can't be proved to be one.
7987       // If the map item is non-contiguous then we don't treat any array section
7988       // as final array section.
7989       bool IsFinalArraySection =
7990           !IsNonContiguous &&
7991           isFinalArraySectionExpression(I->getAssociatedExpression());
7992 
7993       // If we have a declaration for the mapping use that, otherwise use
7994       // the base declaration of the map clause.
7995       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7996                                      ? I->getAssociatedDeclaration()
7997                                      : BaseDecl;
7998       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7999                                                : MapExpr;
8000 
8001       // Get information on whether the element is a pointer. Have to do a
8002       // special treatment for array sections given that they are built-in
8003       // types.
8004       const auto *OASE =
8005           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
8006       const auto *OAShE =
8007           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8008       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8009       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8010       bool IsPointer =
8011           OAShE ||
8012           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8013                        .getCanonicalType()
8014                        ->isAnyPointerType()) ||
8015           I->getAssociatedExpression()->getType()->isAnyPointerType();
8016       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8017                                MapDecl &&
8018                                MapDecl->getType()->isLValueReferenceType();
8019       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8020 
8021       if (OASE)
8022         ++DimSize;
8023 
8024       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8025           IsFinalArraySection) {
8026         // If this is not the last component, we expect the pointer to be
8027         // associated with an array expression or member expression.
8028         assert((Next == CE ||
8029                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8030                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8031                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8032                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8033                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8034                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8035                "Unexpected expression");
8036 
8037         Address LB = Address::invalid();
8038         Address LowestElem = Address::invalid();
8039         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8040                                        const MemberExpr *E) {
8041           const Expr *BaseExpr = E->getBase();
8042           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8043           // scalar.
8044           LValue BaseLV;
8045           if (E->isArrow()) {
8046             LValueBaseInfo BaseInfo;
8047             TBAAAccessInfo TBAAInfo;
8048             Address Addr =
8049                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8050             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8051             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8052           } else {
8053             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8054           }
8055           return BaseLV;
8056         };
8057         if (OAShE) {
8058           LowestElem = LB =
8059               Address::deprecated(CGF.EmitScalarExpr(OAShE->getBase()),
8060                                   CGF.getContext().getTypeAlignInChars(
8061                                       OAShE->getBase()->getType()));
8062         } else if (IsMemberReference) {
8063           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8064           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8065           LowestElem = CGF.EmitLValueForFieldInitialization(
8066                               BaseLVal, cast<FieldDecl>(MapDecl))
8067                            .getAddress(CGF);
8068           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8069                    .getAddress(CGF);
8070         } else {
8071           LowestElem = LB =
8072               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8073                   .getAddress(CGF);
8074         }
8075 
8076         // If this component is a pointer inside the base struct then we don't
8077         // need to create any entry for it - it will be combined with the object
8078         // it is pointing to into a single PTR_AND_OBJ entry.
8079         bool IsMemberPointerOrAddr =
8080             EncounteredME &&
8081             (((IsPointer || ForDeviceAddr) &&
8082               I->getAssociatedExpression() == EncounteredME) ||
8083              (IsPrevMemberReference && !IsPointer) ||
8084              (IsMemberReference && Next != CE &&
8085               !Next->getAssociatedExpression()->getType()->isPointerType()));
8086         if (!OverlappedElements.empty() && Next == CE) {
8087           // Handle base element with the info for overlapped elements.
8088           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8089           assert(!IsPointer &&
8090                  "Unexpected base element with the pointer type.");
8091           // Mark the whole struct as the struct that requires allocation on the
8092           // device.
8093           PartialStruct.LowestElem = {0, LowestElem};
8094           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8095               I->getAssociatedExpression()->getType());
8096           Address HB = CGF.Builder.CreateConstGEP(
8097               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8098                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8099               TypeSize.getQuantity() - 1);
8100           PartialStruct.HighestElem = {
8101               std::numeric_limits<decltype(
8102                   PartialStruct.HighestElem.first)>::max(),
8103               HB};
8104           PartialStruct.Base = BP;
8105           PartialStruct.LB = LB;
8106           assert(
8107               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8108               "Overlapped elements must be used only once for the variable.");
8109           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8110           // Emit data for non-overlapped data.
8111           OpenMPOffloadMappingFlags Flags =
8112               OMP_MAP_MEMBER_OF |
8113               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8114                              /*AddPtrFlag=*/false,
8115                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8116           llvm::Value *Size = nullptr;
8117           // Do bitcopy of all non-overlapped structure elements.
8118           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8119                    Component : OverlappedElements) {
8120             Address ComponentLB = Address::invalid();
8121             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8122                  Component) {
8123               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8124                 const auto *FD = dyn_cast<FieldDecl>(VD);
8125                 if (FD && FD->getType()->isLValueReferenceType()) {
8126                   const auto *ME =
8127                       cast<MemberExpr>(MC.getAssociatedExpression());
8128                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8129                   ComponentLB =
8130                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8131                           .getAddress(CGF);
8132                 } else {
8133                   ComponentLB =
8134                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8135                           .getAddress(CGF);
8136                 }
8137                 Size = CGF.Builder.CreatePtrDiff(
8138                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8139                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8140                 break;
8141               }
8142             }
8143             assert(Size && "Failed to determine structure size");
8144             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8145             CombinedInfo.BasePointers.push_back(BP.getPointer());
8146             CombinedInfo.Pointers.push_back(LB.getPointer());
8147             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8148                 Size, CGF.Int64Ty, /*isSigned=*/true));
8149             CombinedInfo.Types.push_back(Flags);
8150             CombinedInfo.Mappers.push_back(nullptr);
8151             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8152                                                                       : 1);
8153             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8154           }
8155           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8156           CombinedInfo.BasePointers.push_back(BP.getPointer());
8157           CombinedInfo.Pointers.push_back(LB.getPointer());
8158           Size = CGF.Builder.CreatePtrDiff(
8159               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8160               CGF.EmitCastToVoidPtr(LB.getPointer()));
8161           CombinedInfo.Sizes.push_back(
8162               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8163           CombinedInfo.Types.push_back(Flags);
8164           CombinedInfo.Mappers.push_back(nullptr);
8165           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8166                                                                     : 1);
8167           break;
8168         }
8169         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8170         if (!IsMemberPointerOrAddr ||
8171             (Next == CE && MapType != OMPC_MAP_unknown)) {
8172           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8173           CombinedInfo.BasePointers.push_back(BP.getPointer());
8174           CombinedInfo.Pointers.push_back(LB.getPointer());
8175           CombinedInfo.Sizes.push_back(
8176               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8177           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8178                                                                     : 1);
8179 
8180           // If Mapper is valid, the last component inherits the mapper.
8181           bool HasMapper = Mapper && Next == CE;
8182           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8183 
8184           // We need to add a pointer flag for each map that comes from the
8185           // same expression except for the first one. We also need to signal
8186           // this map is the first one that relates with the current capture
8187           // (there is a set of entries for each capture).
8188           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8189               MapType, MapModifiers, MotionModifiers, IsImplicit,
8190               !IsExpressionFirstInfo || RequiresReference ||
8191                   FirstPointerInComplexData || IsMemberReference,
8192               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8193 
8194           if (!IsExpressionFirstInfo || IsMemberReference) {
8195             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8196             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8197             if (IsPointer || (IsMemberReference && Next != CE))
8198               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8199                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8200 
8201             if (ShouldBeMemberOf) {
8202               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8203               // should be later updated with the correct value of MEMBER_OF.
8204               Flags |= OMP_MAP_MEMBER_OF;
8205               // From now on, all subsequent PTR_AND_OBJ entries should not be
8206               // marked as MEMBER_OF.
8207               ShouldBeMemberOf = false;
8208             }
8209           }
8210 
8211           CombinedInfo.Types.push_back(Flags);
8212         }
8213 
8214         // If we have encountered a member expression so far, keep track of the
8215         // mapped member. If the parent is "*this", then the value declaration
8216         // is nullptr.
8217         if (EncounteredME) {
8218           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8219           unsigned FieldIndex = FD->getFieldIndex();
8220 
8221           // Update info about the lowest and highest elements for this struct
8222           if (!PartialStruct.Base.isValid()) {
8223             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8224             if (IsFinalArraySection) {
8225               Address HB =
8226                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8227                       .getAddress(CGF);
8228               PartialStruct.HighestElem = {FieldIndex, HB};
8229             } else {
8230               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8231             }
8232             PartialStruct.Base = BP;
8233             PartialStruct.LB = BP;
8234           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8235             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8236           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8237             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8238           }
8239         }
8240 
8241         // Need to emit combined struct for array sections.
8242         if (IsFinalArraySection || IsNonContiguous)
8243           PartialStruct.IsArraySection = true;
8244 
8245         // If we have a final array section, we are done with this expression.
8246         if (IsFinalArraySection)
8247           break;
8248 
8249         // The pointer becomes the base for the next element.
8250         if (Next != CE)
8251           BP = IsMemberReference ? LowestElem : LB;
8252 
8253         IsExpressionFirstInfo = false;
8254         IsCaptureFirstInfo = false;
8255         FirstPointerInComplexData = false;
8256         IsPrevMemberReference = IsMemberReference;
8257       } else if (FirstPointerInComplexData) {
8258         QualType Ty = Components.rbegin()
8259                           ->getAssociatedDeclaration()
8260                           ->getType()
8261                           .getNonReferenceType();
8262         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8263         FirstPointerInComplexData = false;
8264       }
8265     }
8266     // If ran into the whole component - allocate the space for the whole
8267     // record.
8268     if (!EncounteredME)
8269       PartialStruct.HasCompleteRecord = true;
8270 
8271     if (!IsNonContiguous)
8272       return;
8273 
8274     const ASTContext &Context = CGF.getContext();
8275 
8276     // For supporting stride in array section, we need to initialize the first
8277     // dimension size as 1, first offset as 0, and first count as 1
8278     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8279     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8280     MapValuesArrayTy CurStrides;
8281     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8282     uint64_t ElementTypeSize;
8283 
8284     // Collect Size information for each dimension and get the element size as
8285     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8286     // should be [10, 10] and the first stride is 4 btyes.
8287     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8288          Components) {
8289       const Expr *AssocExpr = Component.getAssociatedExpression();
8290       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8291 
8292       if (!OASE)
8293         continue;
8294 
8295       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8296       auto *CAT = Context.getAsConstantArrayType(Ty);
8297       auto *VAT = Context.getAsVariableArrayType(Ty);
8298 
8299       // We need all the dimension size except for the last dimension.
8300       assert((VAT || CAT || &Component == &*Components.begin()) &&
8301              "Should be either ConstantArray or VariableArray if not the "
8302              "first Component");
8303 
8304       // Get element size if CurStrides is empty.
8305       if (CurStrides.empty()) {
8306         const Type *ElementType = nullptr;
8307         if (CAT)
8308           ElementType = CAT->getElementType().getTypePtr();
8309         else if (VAT)
8310           ElementType = VAT->getElementType().getTypePtr();
8311         else
8312           assert(&Component == &*Components.begin() &&
8313                  "Only expect pointer (non CAT or VAT) when this is the "
8314                  "first Component");
8315         // If ElementType is null, then it means the base is a pointer
8316         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8317         // for next iteration.
8318         if (ElementType) {
8319           // For the case that having pointer as base, we need to remove one
8320           // level of indirection.
8321           if (&Component != &*Components.begin())
8322             ElementType = ElementType->getPointeeOrArrayElementType();
8323           ElementTypeSize =
8324               Context.getTypeSizeInChars(ElementType).getQuantity();
8325           CurStrides.push_back(
8326               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8327         }
8328       }
8329       // Get dimension value except for the last dimension since we don't need
8330       // it.
8331       if (DimSizes.size() < Components.size() - 1) {
8332         if (CAT)
8333           DimSizes.push_back(llvm::ConstantInt::get(
8334               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8335         else if (VAT)
8336           DimSizes.push_back(CGF.Builder.CreateIntCast(
8337               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8338               /*IsSigned=*/false));
8339       }
8340     }
8341 
8342     // Skip the dummy dimension since we have already have its information.
8343     auto *DI = DimSizes.begin() + 1;
8344     // Product of dimension.
8345     llvm::Value *DimProd =
8346         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8347 
8348     // Collect info for non-contiguous. Notice that offset, count, and stride
8349     // are only meaningful for array-section, so we insert a null for anything
8350     // other than array-section.
8351     // Also, the size of offset, count, and stride are not the same as
8352     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8353     // count, and stride are the same as the number of non-contiguous
8354     // declaration in target update to/from clause.
8355     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8356          Components) {
8357       const Expr *AssocExpr = Component.getAssociatedExpression();
8358 
8359       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8360         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8361             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8362             /*isSigned=*/false);
8363         CurOffsets.push_back(Offset);
8364         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8365         CurStrides.push_back(CurStrides.back());
8366         continue;
8367       }
8368 
8369       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8370 
8371       if (!OASE)
8372         continue;
8373 
8374       // Offset
8375       const Expr *OffsetExpr = OASE->getLowerBound();
8376       llvm::Value *Offset = nullptr;
8377       if (!OffsetExpr) {
8378         // If offset is absent, then we just set it to zero.
8379         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8380       } else {
8381         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8382                                            CGF.Int64Ty,
8383                                            /*isSigned=*/false);
8384       }
8385       CurOffsets.push_back(Offset);
8386 
8387       // Count
8388       const Expr *CountExpr = OASE->getLength();
8389       llvm::Value *Count = nullptr;
8390       if (!CountExpr) {
8391         // In Clang, once a high dimension is an array section, we construct all
8392         // the lower dimension as array section, however, for case like
8393         // arr[0:2][2], Clang construct the inner dimension as an array section
8394         // but it actually is not in an array section form according to spec.
8395         if (!OASE->getColonLocFirst().isValid() &&
8396             !OASE->getColonLocSecond().isValid()) {
8397           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8398         } else {
8399           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8400           // When the length is absent it defaults to ⌈(size −
8401           // lower-bound)/stride⌉, where size is the size of the array
8402           // dimension.
8403           const Expr *StrideExpr = OASE->getStride();
8404           llvm::Value *Stride =
8405               StrideExpr
8406                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8407                                               CGF.Int64Ty, /*isSigned=*/false)
8408                   : nullptr;
8409           if (Stride)
8410             Count = CGF.Builder.CreateUDiv(
8411                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8412           else
8413             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8414         }
8415       } else {
8416         Count = CGF.EmitScalarExpr(CountExpr);
8417       }
8418       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8419       CurCounts.push_back(Count);
8420 
8421       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8422       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8423       //              Offset      Count     Stride
8424       //    D0          0           1         4    (int)    <- dummy dimension
8425       //    D1          0           2         8    (2 * (1) * 4)
8426       //    D2          1           2         20   (1 * (1 * 5) * 4)
8427       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8428       const Expr *StrideExpr = OASE->getStride();
8429       llvm::Value *Stride =
8430           StrideExpr
8431               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8432                                           CGF.Int64Ty, /*isSigned=*/false)
8433               : nullptr;
8434       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8435       if (Stride)
8436         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8437       else
8438         CurStrides.push_back(DimProd);
8439       if (DI != DimSizes.end())
8440         ++DI;
8441     }
8442 
8443     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8444     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8445     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8446   }
8447 
8448   /// Return the adjusted map modifiers if the declaration a capture refers to
8449   /// appears in a first-private clause. This is expected to be used only with
8450   /// directives that start with 'target'.
8451   MappableExprsHandler::OpenMPOffloadMappingFlags
8452   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8453     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8454 
8455     // A first private variable captured by reference will use only the
8456     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8457     // declaration is known as first-private in this handler.
8458     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8459       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8460         return MappableExprsHandler::OMP_MAP_TO |
8461                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8462       return MappableExprsHandler::OMP_MAP_PRIVATE |
8463              MappableExprsHandler::OMP_MAP_TO;
8464     }
8465     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8466     if (I != LambdasMap.end())
8467       // for map(to: lambda): using user specified map type.
8468       return getMapTypeBits(
8469           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8470           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8471           /*AddPtrFlag=*/false,
8472           /*AddIsTargetParamFlag=*/false,
8473           /*isNonContiguous=*/false);
8474     return MappableExprsHandler::OMP_MAP_TO |
8475            MappableExprsHandler::OMP_MAP_FROM;
8476   }
8477 
8478   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8479     // Rotate by getFlagMemberOffset() bits.
8480     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8481                                                   << getFlagMemberOffset());
8482   }
8483 
8484   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8485                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8486     // If the entry is PTR_AND_OBJ but has not been marked with the special
8487     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8488     // marked as MEMBER_OF.
8489     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8490         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8491       return;
8492 
8493     // Reset the placeholder value to prepare the flag for the assignment of the
8494     // proper MEMBER_OF value.
8495     Flags &= ~OMP_MAP_MEMBER_OF;
8496     Flags |= MemberOfFlag;
8497   }
8498 
8499   void getPlainLayout(const CXXRecordDecl *RD,
8500                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8501                       bool AsBase) const {
8502     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8503 
8504     llvm::StructType *St =
8505         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8506 
8507     unsigned NumElements = St->getNumElements();
8508     llvm::SmallVector<
8509         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8510         RecordLayout(NumElements);
8511 
8512     // Fill bases.
8513     for (const auto &I : RD->bases()) {
8514       if (I.isVirtual())
8515         continue;
8516       const auto *Base = I.getType()->getAsCXXRecordDecl();
8517       // Ignore empty bases.
8518       if (Base->isEmpty() || CGF.getContext()
8519                                  .getASTRecordLayout(Base)
8520                                  .getNonVirtualSize()
8521                                  .isZero())
8522         continue;
8523 
8524       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8525       RecordLayout[FieldIndex] = Base;
8526     }
8527     // Fill in virtual bases.
8528     for (const auto &I : RD->vbases()) {
8529       const auto *Base = I.getType()->getAsCXXRecordDecl();
8530       // Ignore empty bases.
8531       if (Base->isEmpty())
8532         continue;
8533       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8534       if (RecordLayout[FieldIndex])
8535         continue;
8536       RecordLayout[FieldIndex] = Base;
8537     }
8538     // Fill in all the fields.
8539     assert(!RD->isUnion() && "Unexpected union.");
8540     for (const auto *Field : RD->fields()) {
8541       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8542       // will fill in later.)
8543       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8544         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8545         RecordLayout[FieldIndex] = Field;
8546       }
8547     }
8548     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8549              &Data : RecordLayout) {
8550       if (Data.isNull())
8551         continue;
8552       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8553         getPlainLayout(Base, Layout, /*AsBase=*/true);
8554       else
8555         Layout.push_back(Data.get<const FieldDecl *>());
8556     }
8557   }
8558 
8559   /// Generate all the base pointers, section pointers, sizes, map types, and
8560   /// mappers for the extracted mappable expressions (all included in \a
8561   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8562   /// pair of the relevant declaration and index where it occurs is appended to
8563   /// the device pointers info array.
8564   void generateAllInfoForClauses(
8565       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8566       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8567           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8568     // We have to process the component lists that relate with the same
8569     // declaration in a single chunk so that we can generate the map flags
8570     // correctly. Therefore, we organize all lists in a map.
8571     enum MapKind { Present, Allocs, Other, Total };
8572     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8573                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8574         Info;
8575 
8576     // Helper function to fill the information map for the different supported
8577     // clauses.
8578     auto &&InfoGen =
8579         [&Info, &SkipVarSet](
8580             const ValueDecl *D, MapKind Kind,
8581             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8582             OpenMPMapClauseKind MapType,
8583             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8584             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8585             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8586             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8587           if (SkipVarSet.contains(D))
8588             return;
8589           auto It = Info.find(D);
8590           if (It == Info.end())
8591             It = Info
8592                      .insert(std::make_pair(
8593                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8594                      .first;
8595           It->second[Kind].emplace_back(
8596               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8597               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8598         };
8599 
8600     for (const auto *Cl : Clauses) {
8601       const auto *C = dyn_cast<OMPMapClause>(Cl);
8602       if (!C)
8603         continue;
8604       MapKind Kind = Other;
8605       if (llvm::is_contained(C->getMapTypeModifiers(),
8606                              OMPC_MAP_MODIFIER_present))
8607         Kind = Present;
8608       else if (C->getMapType() == OMPC_MAP_alloc)
8609         Kind = Allocs;
8610       const auto *EI = C->getVarRefs().begin();
8611       for (const auto L : C->component_lists()) {
8612         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8613         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8614                 C->getMapTypeModifiers(), llvm::None,
8615                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8616                 E);
8617         ++EI;
8618       }
8619     }
8620     for (const auto *Cl : Clauses) {
8621       const auto *C = dyn_cast<OMPToClause>(Cl);
8622       if (!C)
8623         continue;
8624       MapKind Kind = Other;
8625       if (llvm::is_contained(C->getMotionModifiers(),
8626                              OMPC_MOTION_MODIFIER_present))
8627         Kind = Present;
8628       const auto *EI = C->getVarRefs().begin();
8629       for (const auto L : C->component_lists()) {
8630         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8631                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8632                 C->isImplicit(), std::get<2>(L), *EI);
8633         ++EI;
8634       }
8635     }
8636     for (const auto *Cl : Clauses) {
8637       const auto *C = dyn_cast<OMPFromClause>(Cl);
8638       if (!C)
8639         continue;
8640       MapKind Kind = Other;
8641       if (llvm::is_contained(C->getMotionModifiers(),
8642                              OMPC_MOTION_MODIFIER_present))
8643         Kind = Present;
8644       const auto *EI = C->getVarRefs().begin();
8645       for (const auto L : C->component_lists()) {
8646         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8647                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8648                 C->isImplicit(), std::get<2>(L), *EI);
8649         ++EI;
8650       }
8651     }
8652 
8653     // Look at the use_device_ptr clause information and mark the existing map
8654     // entries as such. If there is no map information for an entry in the
8655     // use_device_ptr list, we create one with map type 'alloc' and zero size
8656     // section. It is the user fault if that was not mapped before. If there is
8657     // no map information and the pointer is a struct member, then we defer the
8658     // emission of that entry until the whole struct has been processed.
8659     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8660                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8661         DeferredInfo;
8662     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8663 
8664     for (const auto *Cl : Clauses) {
8665       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8666       if (!C)
8667         continue;
8668       for (const auto L : C->component_lists()) {
8669         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8670             std::get<1>(L);
8671         assert(!Components.empty() &&
8672                "Not expecting empty list of components!");
8673         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8674         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8675         const Expr *IE = Components.back().getAssociatedExpression();
8676         // If the first component is a member expression, we have to look into
8677         // 'this', which maps to null in the map of map information. Otherwise
8678         // look directly for the information.
8679         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8680 
8681         // We potentially have map information for this declaration already.
8682         // Look for the first set of components that refer to it.
8683         if (It != Info.end()) {
8684           bool Found = false;
8685           for (auto &Data : It->second) {
8686             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8687               return MI.Components.back().getAssociatedDeclaration() == VD;
8688             });
8689             // If we found a map entry, signal that the pointer has to be
8690             // returned and move on to the next declaration. Exclude cases where
8691             // the base pointer is mapped as array subscript, array section or
8692             // array shaping. The base address is passed as a pointer to base in
8693             // this case and cannot be used as a base for use_device_ptr list
8694             // item.
8695             if (CI != Data.end()) {
8696               auto PrevCI = std::next(CI->Components.rbegin());
8697               const auto *VarD = dyn_cast<VarDecl>(VD);
8698               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8699                   isa<MemberExpr>(IE) ||
8700                   !VD->getType().getNonReferenceType()->isPointerType() ||
8701                   PrevCI == CI->Components.rend() ||
8702                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8703                   VarD->hasLocalStorage()) {
8704                 CI->ReturnDevicePointer = true;
8705                 Found = true;
8706                 break;
8707               }
8708             }
8709           }
8710           if (Found)
8711             continue;
8712         }
8713 
8714         // We didn't find any match in our map information - generate a zero
8715         // size array section - if the pointer is a struct member we defer this
8716         // action until the whole struct has been processed.
8717         if (isa<MemberExpr>(IE)) {
8718           // Insert the pointer into Info to be processed by
8719           // generateInfoForComponentList. Because it is a member pointer
8720           // without a pointee, no entry will be generated for it, therefore
8721           // we need to generate one after the whole struct has been processed.
8722           // Nonetheless, generateInfoForComponentList must be called to take
8723           // the pointer into account for the calculation of the range of the
8724           // partial struct.
8725           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8726                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8727                   nullptr);
8728           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8729         } else {
8730           llvm::Value *Ptr =
8731               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8732           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8733           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8734           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8735           UseDevicePtrCombinedInfo.Sizes.push_back(
8736               llvm::Constant::getNullValue(CGF.Int64Ty));
8737           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8738           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8739         }
8740       }
8741     }
8742 
8743     // Look at the use_device_addr clause information and mark the existing map
8744     // entries as such. If there is no map information for an entry in the
8745     // use_device_addr list, we create one with map type 'alloc' and zero size
8746     // section. It is the user fault if that was not mapped before. If there is
8747     // no map information and the pointer is a struct member, then we defer the
8748     // emission of that entry until the whole struct has been processed.
8749     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8750     for (const auto *Cl : Clauses) {
8751       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8752       if (!C)
8753         continue;
8754       for (const auto L : C->component_lists()) {
8755         assert(!std::get<1>(L).empty() &&
8756                "Not expecting empty list of components!");
8757         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8758         if (!Processed.insert(VD).second)
8759           continue;
8760         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8761         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8762         // If the first component is a member expression, we have to look into
8763         // 'this', which maps to null in the map of map information. Otherwise
8764         // look directly for the information.
8765         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8766 
8767         // We potentially have map information for this declaration already.
8768         // Look for the first set of components that refer to it.
8769         if (It != Info.end()) {
8770           bool Found = false;
8771           for (auto &Data : It->second) {
8772             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8773               return MI.Components.back().getAssociatedDeclaration() == VD;
8774             });
8775             // If we found a map entry, signal that the pointer has to be
8776             // returned and move on to the next declaration.
8777             if (CI != Data.end()) {
8778               CI->ReturnDevicePointer = true;
8779               Found = true;
8780               break;
8781             }
8782           }
8783           if (Found)
8784             continue;
8785         }
8786 
8787         // We didn't find any match in our map information - generate a zero
8788         // size array section - if the pointer is a struct member we defer this
8789         // action until the whole struct has been processed.
8790         if (isa<MemberExpr>(IE)) {
8791           // Insert the pointer into Info to be processed by
8792           // generateInfoForComponentList. Because it is a member pointer
8793           // without a pointee, no entry will be generated for it, therefore
8794           // we need to generate one after the whole struct has been processed.
8795           // Nonetheless, generateInfoForComponentList must be called to take
8796           // the pointer into account for the calculation of the range of the
8797           // partial struct.
8798           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8799                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8800                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8801           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8802         } else {
8803           llvm::Value *Ptr;
8804           if (IE->isGLValue())
8805             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8806           else
8807             Ptr = CGF.EmitScalarExpr(IE);
8808           CombinedInfo.Exprs.push_back(VD);
8809           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8810           CombinedInfo.Pointers.push_back(Ptr);
8811           CombinedInfo.Sizes.push_back(
8812               llvm::Constant::getNullValue(CGF.Int64Ty));
8813           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8814           CombinedInfo.Mappers.push_back(nullptr);
8815         }
8816       }
8817     }
8818 
8819     for (const auto &Data : Info) {
8820       StructRangeInfoTy PartialStruct;
8821       // Temporary generated information.
8822       MapCombinedInfoTy CurInfo;
8823       const Decl *D = Data.first;
8824       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8825       for (const auto &M : Data.second) {
8826         for (const MapInfo &L : M) {
8827           assert(!L.Components.empty() &&
8828                  "Not expecting declaration with no component lists.");
8829 
8830           // Remember the current base pointer index.
8831           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8832           CurInfo.NonContigInfo.IsNonContiguous =
8833               L.Components.back().isNonContiguous();
8834           generateInfoForComponentList(
8835               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8836               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8837               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8838 
8839           // If this entry relates with a device pointer, set the relevant
8840           // declaration and add the 'return pointer' flag.
8841           if (L.ReturnDevicePointer) {
8842             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8843                    "Unexpected number of mapped base pointers.");
8844 
8845             const ValueDecl *RelevantVD =
8846                 L.Components.back().getAssociatedDeclaration();
8847             assert(RelevantVD &&
8848                    "No relevant declaration related with device pointer??");
8849 
8850             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8851                 RelevantVD);
8852             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8853           }
8854         }
8855       }
8856 
8857       // Append any pending zero-length pointers which are struct members and
8858       // used with use_device_ptr or use_device_addr.
8859       auto CI = DeferredInfo.find(Data.first);
8860       if (CI != DeferredInfo.end()) {
8861         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8862           llvm::Value *BasePtr;
8863           llvm::Value *Ptr;
8864           if (L.ForDeviceAddr) {
8865             if (L.IE->isGLValue())
8866               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8867             else
8868               Ptr = this->CGF.EmitScalarExpr(L.IE);
8869             BasePtr = Ptr;
8870             // Entry is RETURN_PARAM. Also, set the placeholder value
8871             // MEMBER_OF=FFFF so that the entry is later updated with the
8872             // correct value of MEMBER_OF.
8873             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8874           } else {
8875             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8876             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8877                                              L.IE->getExprLoc());
8878             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8879             // placeholder value MEMBER_OF=FFFF so that the entry is later
8880             // updated with the correct value of MEMBER_OF.
8881             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8882                                     OMP_MAP_MEMBER_OF);
8883           }
8884           CurInfo.Exprs.push_back(L.VD);
8885           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8886           CurInfo.Pointers.push_back(Ptr);
8887           CurInfo.Sizes.push_back(
8888               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8889           CurInfo.Mappers.push_back(nullptr);
8890         }
8891       }
8892       // If there is an entry in PartialStruct it means we have a struct with
8893       // individual members mapped. Emit an extra combined entry.
8894       if (PartialStruct.Base.isValid()) {
8895         CurInfo.NonContigInfo.Dims.push_back(0);
8896         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8897       }
8898 
8899       // We need to append the results of this capture to what we already
8900       // have.
8901       CombinedInfo.append(CurInfo);
8902     }
8903     // Append data for use_device_ptr clauses.
8904     CombinedInfo.append(UseDevicePtrCombinedInfo);
8905   }
8906 
8907 public:
8908   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8909       : CurDir(&Dir), CGF(CGF) {
8910     // Extract firstprivate clause information.
8911     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8912       for (const auto *D : C->varlists())
8913         FirstPrivateDecls.try_emplace(
8914             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8915     // Extract implicit firstprivates from uses_allocators clauses.
8916     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8917       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8918         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8919         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8920           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8921                                         /*Implicit=*/true);
8922         else if (const auto *VD = dyn_cast<VarDecl>(
8923                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8924                          ->getDecl()))
8925           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8926       }
8927     }
8928     // Extract device pointer clause information.
8929     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8930       for (auto L : C->component_lists())
8931         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8932     // Extract map information.
8933     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8934       if (C->getMapType() != OMPC_MAP_to)
8935         continue;
8936       for (auto L : C->component_lists()) {
8937         const ValueDecl *VD = std::get<0>(L);
8938         const auto *RD = VD ? VD->getType()
8939                                   .getCanonicalType()
8940                                   .getNonReferenceType()
8941                                   ->getAsCXXRecordDecl()
8942                             : nullptr;
8943         if (RD && RD->isLambda())
8944           LambdasMap.try_emplace(std::get<0>(L), C);
8945       }
8946     }
8947   }
8948 
8949   /// Constructor for the declare mapper directive.
8950   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8951       : CurDir(&Dir), CGF(CGF) {}
8952 
8953   /// Generate code for the combined entry if we have a partially mapped struct
8954   /// and take care of the mapping flags of the arguments corresponding to
8955   /// individual struct members.
8956   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8957                          MapFlagsArrayTy &CurTypes,
8958                          const StructRangeInfoTy &PartialStruct,
8959                          const ValueDecl *VD = nullptr,
8960                          bool NotTargetParams = true) const {
8961     if (CurTypes.size() == 1 &&
8962         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8963         !PartialStruct.IsArraySection)
8964       return;
8965     Address LBAddr = PartialStruct.LowestElem.second;
8966     Address HBAddr = PartialStruct.HighestElem.second;
8967     if (PartialStruct.HasCompleteRecord) {
8968       LBAddr = PartialStruct.LB;
8969       HBAddr = PartialStruct.LB;
8970     }
8971     CombinedInfo.Exprs.push_back(VD);
8972     // Base is the base of the struct
8973     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8974     // Pointer is the address of the lowest element
8975     llvm::Value *LB = LBAddr.getPointer();
8976     CombinedInfo.Pointers.push_back(LB);
8977     // There should not be a mapper for a combined entry.
8978     CombinedInfo.Mappers.push_back(nullptr);
8979     // Size is (addr of {highest+1} element) - (addr of lowest element)
8980     llvm::Value *HB = HBAddr.getPointer();
8981     llvm::Value *HAddr =
8982         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8983     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8984     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8985     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8986     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8987                                                   /*isSigned=*/false);
8988     CombinedInfo.Sizes.push_back(Size);
8989     // Map type is always TARGET_PARAM, if generate info for captures.
8990     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8991                                                  : OMP_MAP_TARGET_PARAM);
8992     // If any element has the present modifier, then make sure the runtime
8993     // doesn't attempt to allocate the struct.
8994     if (CurTypes.end() !=
8995         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8996           return Type & OMP_MAP_PRESENT;
8997         }))
8998       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8999     // Remove TARGET_PARAM flag from the first element
9000     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
9001     // If any element has the ompx_hold modifier, then make sure the runtime
9002     // uses the hold reference count for the struct as a whole so that it won't
9003     // be unmapped by an extra dynamic reference count decrement.  Add it to all
9004     // elements as well so the runtime knows which reference count to check
9005     // when determining whether it's time for device-to-host transfers of
9006     // individual elements.
9007     if (CurTypes.end() !=
9008         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9009           return Type & OMP_MAP_OMPX_HOLD;
9010         })) {
9011       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
9012       for (auto &M : CurTypes)
9013         M |= OMP_MAP_OMPX_HOLD;
9014     }
9015 
9016     // All other current entries will be MEMBER_OF the combined entry
9017     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9018     // 0xFFFF in the MEMBER_OF field).
9019     OpenMPOffloadMappingFlags MemberOfFlag =
9020         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9021     for (auto &M : CurTypes)
9022       setCorrectMemberOfFlag(M, MemberOfFlag);
9023   }
9024 
9025   /// Generate all the base pointers, section pointers, sizes, map types, and
9026   /// mappers for the extracted mappable expressions (all included in \a
9027   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9028   /// pair of the relevant declaration and index where it occurs is appended to
9029   /// the device pointers info array.
9030   void generateAllInfo(
9031       MapCombinedInfoTy &CombinedInfo,
9032       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9033           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9034     assert(CurDir.is<const OMPExecutableDirective *>() &&
9035            "Expect a executable directive");
9036     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9037     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9038   }
9039 
9040   /// Generate all the base pointers, section pointers, sizes, map types, and
9041   /// mappers for the extracted map clauses of user-defined mapper (all included
9042   /// in \a CombinedInfo).
9043   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9044     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9045            "Expect a declare mapper directive");
9046     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9047     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9048   }
9049 
9050   /// Emit capture info for lambdas for variables captured by reference.
9051   void generateInfoForLambdaCaptures(
9052       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9053       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9054     const auto *RD = VD->getType()
9055                          .getCanonicalType()
9056                          .getNonReferenceType()
9057                          ->getAsCXXRecordDecl();
9058     if (!RD || !RD->isLambda())
9059       return;
9060     Address VDAddr =
9061         Address::deprecated(Arg, CGF.getContext().getDeclAlign(VD));
9062     LValue VDLVal = CGF.MakeAddrLValue(
9063         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9064     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9065     FieldDecl *ThisCapture = nullptr;
9066     RD->getCaptureFields(Captures, ThisCapture);
9067     if (ThisCapture) {
9068       LValue ThisLVal =
9069           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9070       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9071       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9072                                  VDLVal.getPointer(CGF));
9073       CombinedInfo.Exprs.push_back(VD);
9074       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9075       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9076       CombinedInfo.Sizes.push_back(
9077           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9078                                     CGF.Int64Ty, /*isSigned=*/true));
9079       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9080                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9081       CombinedInfo.Mappers.push_back(nullptr);
9082     }
9083     for (const LambdaCapture &LC : RD->captures()) {
9084       if (!LC.capturesVariable())
9085         continue;
9086       const VarDecl *VD = LC.getCapturedVar();
9087       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9088         continue;
9089       auto It = Captures.find(VD);
9090       assert(It != Captures.end() && "Found lambda capture without field.");
9091       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9092       if (LC.getCaptureKind() == LCK_ByRef) {
9093         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9094         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9095                                    VDLVal.getPointer(CGF));
9096         CombinedInfo.Exprs.push_back(VD);
9097         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9098         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9099         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9100             CGF.getTypeSize(
9101                 VD->getType().getCanonicalType().getNonReferenceType()),
9102             CGF.Int64Ty, /*isSigned=*/true));
9103       } else {
9104         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9105         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9106                                    VDLVal.getPointer(CGF));
9107         CombinedInfo.Exprs.push_back(VD);
9108         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9109         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9110         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9111       }
9112       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9113                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9114       CombinedInfo.Mappers.push_back(nullptr);
9115     }
9116   }
9117 
9118   /// Set correct indices for lambdas captures.
9119   void adjustMemberOfForLambdaCaptures(
9120       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9121       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9122       MapFlagsArrayTy &Types) const {
9123     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9124       // Set correct member_of idx for all implicit lambda captures.
9125       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9126                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9127         continue;
9128       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9129       assert(BasePtr && "Unable to find base lambda address.");
9130       int TgtIdx = -1;
9131       for (unsigned J = I; J > 0; --J) {
9132         unsigned Idx = J - 1;
9133         if (Pointers[Idx] != BasePtr)
9134           continue;
9135         TgtIdx = Idx;
9136         break;
9137       }
9138       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9139       // All other current entries will be MEMBER_OF the combined entry
9140       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9141       // 0xFFFF in the MEMBER_OF field).
9142       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9143       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9144     }
9145   }
9146 
9147   /// Generate the base pointers, section pointers, sizes, map types, and
9148   /// mappers associated to a given capture (all included in \a CombinedInfo).
9149   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9150                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9151                               StructRangeInfoTy &PartialStruct) const {
9152     assert(!Cap->capturesVariableArrayType() &&
9153            "Not expecting to generate map info for a variable array type!");
9154 
9155     // We need to know when we generating information for the first component
9156     const ValueDecl *VD = Cap->capturesThis()
9157                               ? nullptr
9158                               : Cap->getCapturedVar()->getCanonicalDecl();
9159 
9160     // for map(to: lambda): skip here, processing it in
9161     // generateDefaultMapInfo
9162     if (LambdasMap.count(VD))
9163       return;
9164 
9165     // If this declaration appears in a is_device_ptr clause we just have to
9166     // pass the pointer by value. If it is a reference to a declaration, we just
9167     // pass its value.
9168     if (DevPointersMap.count(VD)) {
9169       CombinedInfo.Exprs.push_back(VD);
9170       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9171       CombinedInfo.Pointers.push_back(Arg);
9172       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9173           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9174           /*isSigned=*/true));
9175       CombinedInfo.Types.push_back(
9176           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9177           OMP_MAP_TARGET_PARAM);
9178       CombinedInfo.Mappers.push_back(nullptr);
9179       return;
9180     }
9181 
9182     using MapData =
9183         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9184                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9185                    const ValueDecl *, const Expr *>;
9186     SmallVector<MapData, 4> DeclComponentLists;
9187     assert(CurDir.is<const OMPExecutableDirective *>() &&
9188            "Expect a executable directive");
9189     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9190     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9191       const auto *EI = C->getVarRefs().begin();
9192       for (const auto L : C->decl_component_lists(VD)) {
9193         const ValueDecl *VDecl, *Mapper;
9194         // The Expression is not correct if the mapping is implicit
9195         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9196         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9197         std::tie(VDecl, Components, Mapper) = L;
9198         assert(VDecl == VD && "We got information for the wrong declaration??");
9199         assert(!Components.empty() &&
9200                "Not expecting declaration with no component lists.");
9201         DeclComponentLists.emplace_back(Components, C->getMapType(),
9202                                         C->getMapTypeModifiers(),
9203                                         C->isImplicit(), Mapper, E);
9204         ++EI;
9205       }
9206     }
9207     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9208                                              const MapData &RHS) {
9209       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9210       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9211       bool HasPresent =
9212           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9213       bool HasAllocs = MapType == OMPC_MAP_alloc;
9214       MapModifiers = std::get<2>(RHS);
9215       MapType = std::get<1>(LHS);
9216       bool HasPresentR =
9217           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9218       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9219       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9220     });
9221 
9222     // Find overlapping elements (including the offset from the base element).
9223     llvm::SmallDenseMap<
9224         const MapData *,
9225         llvm::SmallVector<
9226             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9227         4>
9228         OverlappedData;
9229     size_t Count = 0;
9230     for (const MapData &L : DeclComponentLists) {
9231       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9232       OpenMPMapClauseKind MapType;
9233       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9234       bool IsImplicit;
9235       const ValueDecl *Mapper;
9236       const Expr *VarRef;
9237       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9238           L;
9239       ++Count;
9240       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9241         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9242         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9243                  VarRef) = L1;
9244         auto CI = Components.rbegin();
9245         auto CE = Components.rend();
9246         auto SI = Components1.rbegin();
9247         auto SE = Components1.rend();
9248         for (; CI != CE && SI != SE; ++CI, ++SI) {
9249           if (CI->getAssociatedExpression()->getStmtClass() !=
9250               SI->getAssociatedExpression()->getStmtClass())
9251             break;
9252           // Are we dealing with different variables/fields?
9253           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9254             break;
9255         }
9256         // Found overlapping if, at least for one component, reached the head
9257         // of the components list.
9258         if (CI == CE || SI == SE) {
9259           // Ignore it if it is the same component.
9260           if (CI == CE && SI == SE)
9261             continue;
9262           const auto It = (SI == SE) ? CI : SI;
9263           // If one component is a pointer and another one is a kind of
9264           // dereference of this pointer (array subscript, section, dereference,
9265           // etc.), it is not an overlapping.
9266           // Same, if one component is a base and another component is a
9267           // dereferenced pointer memberexpr with the same base.
9268           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9269               (std::prev(It)->getAssociatedDeclaration() &&
9270                std::prev(It)
9271                    ->getAssociatedDeclaration()
9272                    ->getType()
9273                    ->isPointerType()) ||
9274               (It->getAssociatedDeclaration() &&
9275                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9276                std::next(It) != CE && std::next(It) != SE))
9277             continue;
9278           const MapData &BaseData = CI == CE ? L : L1;
9279           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9280               SI == SE ? Components : Components1;
9281           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9282           OverlappedElements.getSecond().push_back(SubData);
9283         }
9284       }
9285     }
9286     // Sort the overlapped elements for each item.
9287     llvm::SmallVector<const FieldDecl *, 4> Layout;
9288     if (!OverlappedData.empty()) {
9289       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9290       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9291       while (BaseType != OrigType) {
9292         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9293         OrigType = BaseType->getPointeeOrArrayElementType();
9294       }
9295 
9296       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9297         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9298       else {
9299         const auto *RD = BaseType->getAsRecordDecl();
9300         Layout.append(RD->field_begin(), RD->field_end());
9301       }
9302     }
9303     for (auto &Pair : OverlappedData) {
9304       llvm::stable_sort(
9305           Pair.getSecond(),
9306           [&Layout](
9307               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9308               OMPClauseMappableExprCommon::MappableExprComponentListRef
9309                   Second) {
9310             auto CI = First.rbegin();
9311             auto CE = First.rend();
9312             auto SI = Second.rbegin();
9313             auto SE = Second.rend();
9314             for (; CI != CE && SI != SE; ++CI, ++SI) {
9315               if (CI->getAssociatedExpression()->getStmtClass() !=
9316                   SI->getAssociatedExpression()->getStmtClass())
9317                 break;
9318               // Are we dealing with different variables/fields?
9319               if (CI->getAssociatedDeclaration() !=
9320                   SI->getAssociatedDeclaration())
9321                 break;
9322             }
9323 
9324             // Lists contain the same elements.
9325             if (CI == CE && SI == SE)
9326               return false;
9327 
9328             // List with less elements is less than list with more elements.
9329             if (CI == CE || SI == SE)
9330               return CI == CE;
9331 
9332             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9333             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9334             if (FD1->getParent() == FD2->getParent())
9335               return FD1->getFieldIndex() < FD2->getFieldIndex();
9336             const auto *It =
9337                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9338                   return FD == FD1 || FD == FD2;
9339                 });
9340             return *It == FD1;
9341           });
9342     }
9343 
9344     // Associated with a capture, because the mapping flags depend on it.
9345     // Go through all of the elements with the overlapped elements.
9346     bool IsFirstComponentList = true;
9347     for (const auto &Pair : OverlappedData) {
9348       const MapData &L = *Pair.getFirst();
9349       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9350       OpenMPMapClauseKind MapType;
9351       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9352       bool IsImplicit;
9353       const ValueDecl *Mapper;
9354       const Expr *VarRef;
9355       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9356           L;
9357       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9358           OverlappedComponents = Pair.getSecond();
9359       generateInfoForComponentList(
9360           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9361           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9362           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9363       IsFirstComponentList = false;
9364     }
9365     // Go through other elements without overlapped elements.
9366     for (const MapData &L : DeclComponentLists) {
9367       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9368       OpenMPMapClauseKind MapType;
9369       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9370       bool IsImplicit;
9371       const ValueDecl *Mapper;
9372       const Expr *VarRef;
9373       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9374           L;
9375       auto It = OverlappedData.find(&L);
9376       if (It == OverlappedData.end())
9377         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9378                                      Components, CombinedInfo, PartialStruct,
9379                                      IsFirstComponentList, IsImplicit, Mapper,
9380                                      /*ForDeviceAddr=*/false, VD, VarRef);
9381       IsFirstComponentList = false;
9382     }
9383   }
9384 
9385   /// Generate the default map information for a given capture \a CI,
9386   /// record field declaration \a RI and captured value \a CV.
9387   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9388                               const FieldDecl &RI, llvm::Value *CV,
9389                               MapCombinedInfoTy &CombinedInfo) const {
9390     bool IsImplicit = true;
9391     // Do the default mapping.
9392     if (CI.capturesThis()) {
9393       CombinedInfo.Exprs.push_back(nullptr);
9394       CombinedInfo.BasePointers.push_back(CV);
9395       CombinedInfo.Pointers.push_back(CV);
9396       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9397       CombinedInfo.Sizes.push_back(
9398           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9399                                     CGF.Int64Ty, /*isSigned=*/true));
9400       // Default map type.
9401       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9402     } else if (CI.capturesVariableByCopy()) {
9403       const VarDecl *VD = CI.getCapturedVar();
9404       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9405       CombinedInfo.BasePointers.push_back(CV);
9406       CombinedInfo.Pointers.push_back(CV);
9407       if (!RI.getType()->isAnyPointerType()) {
9408         // We have to signal to the runtime captures passed by value that are
9409         // not pointers.
9410         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9411         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9412             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9413       } else {
9414         // Pointers are implicitly mapped with a zero size and no flags
9415         // (other than first map that is added for all implicit maps).
9416         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9417         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9418       }
9419       auto I = FirstPrivateDecls.find(VD);
9420       if (I != FirstPrivateDecls.end())
9421         IsImplicit = I->getSecond();
9422     } else {
9423       assert(CI.capturesVariable() && "Expected captured reference.");
9424       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9425       QualType ElementType = PtrTy->getPointeeType();
9426       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9427           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9428       // The default map type for a scalar/complex type is 'to' because by
9429       // default the value doesn't have to be retrieved. For an aggregate
9430       // type, the default is 'tofrom'.
9431       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9432       const VarDecl *VD = CI.getCapturedVar();
9433       auto I = FirstPrivateDecls.find(VD);
9434       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9435       CombinedInfo.BasePointers.push_back(CV);
9436       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9437         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9438             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9439             AlignmentSource::Decl));
9440         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9441       } else {
9442         CombinedInfo.Pointers.push_back(CV);
9443       }
9444       if (I != FirstPrivateDecls.end())
9445         IsImplicit = I->getSecond();
9446     }
9447     // Every default map produces a single argument which is a target parameter.
9448     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9449 
9450     // Add flag stating this is an implicit map.
9451     if (IsImplicit)
9452       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9453 
9454     // No user-defined mapper for default mapping.
9455     CombinedInfo.Mappers.push_back(nullptr);
9456   }
9457 };
9458 } // anonymous namespace
9459 
9460 static void emitNonContiguousDescriptor(
9461     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9462     CGOpenMPRuntime::TargetDataInfo &Info) {
9463   CodeGenModule &CGM = CGF.CGM;
9464   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9465       &NonContigInfo = CombinedInfo.NonContigInfo;
9466 
9467   // Build an array of struct descriptor_dim and then assign it to
9468   // offload_args.
9469   //
9470   // struct descriptor_dim {
9471   //  uint64_t offset;
9472   //  uint64_t count;
9473   //  uint64_t stride
9474   // };
9475   ASTContext &C = CGF.getContext();
9476   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9477   RecordDecl *RD;
9478   RD = C.buildImplicitRecord("descriptor_dim");
9479   RD->startDefinition();
9480   addFieldToRecordDecl(C, RD, Int64Ty);
9481   addFieldToRecordDecl(C, RD, Int64Ty);
9482   addFieldToRecordDecl(C, RD, Int64Ty);
9483   RD->completeDefinition();
9484   QualType DimTy = C.getRecordType(RD);
9485 
9486   enum { OffsetFD = 0, CountFD, StrideFD };
9487   // We need two index variable here since the size of "Dims" is the same as the
9488   // size of Components, however, the size of offset, count, and stride is equal
9489   // to the size of base declaration that is non-contiguous.
9490   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9491     // Skip emitting ir if dimension size is 1 since it cannot be
9492     // non-contiguous.
9493     if (NonContigInfo.Dims[I] == 1)
9494       continue;
9495     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9496     QualType ArrayTy =
9497         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9498     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9499     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9500       unsigned RevIdx = EE - II - 1;
9501       LValue DimsLVal = CGF.MakeAddrLValue(
9502           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9503       // Offset
9504       LValue OffsetLVal = CGF.EmitLValueForField(
9505           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9506       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9507       // Count
9508       LValue CountLVal = CGF.EmitLValueForField(
9509           DimsLVal, *std::next(RD->field_begin(), CountFD));
9510       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9511       // Stride
9512       LValue StrideLVal = CGF.EmitLValueForField(
9513           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9514       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9515     }
9516     // args[I] = &dims
9517     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9518         DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9519     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9520         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9521         Info.PointersArray, 0, I);
9522     Address PAddr = Address::deprecated(P, CGF.getPointerAlign());
9523     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9524     ++L;
9525   }
9526 }
9527 
9528 // Try to extract the base declaration from a `this->x` expression if possible.
9529 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9530   if (!E)
9531     return nullptr;
9532 
9533   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9534     if (const MemberExpr *ME =
9535             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9536       return ME->getMemberDecl();
9537   return nullptr;
9538 }
9539 
9540 /// Emit a string constant containing the names of the values mapped to the
9541 /// offloading runtime library.
9542 llvm::Constant *
9543 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9544                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9545 
9546   uint32_t SrcLocStrSize;
9547   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9548     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9549 
9550   SourceLocation Loc;
9551   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9552     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9553       Loc = VD->getLocation();
9554     else
9555       Loc = MapExprs.getMapExpr()->getExprLoc();
9556   } else {
9557     Loc = MapExprs.getMapDecl()->getLocation();
9558   }
9559 
9560   std::string ExprName;
9561   if (MapExprs.getMapExpr()) {
9562     PrintingPolicy P(CGF.getContext().getLangOpts());
9563     llvm::raw_string_ostream OS(ExprName);
9564     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9565     OS.flush();
9566   } else {
9567     ExprName = MapExprs.getMapDecl()->getNameAsString();
9568   }
9569 
9570   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9571   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9572                                          PLoc.getLine(), PLoc.getColumn(),
9573                                          SrcLocStrSize);
9574 }
9575 
9576 /// Emit the arrays used to pass the captures and map information to the
9577 /// offloading runtime library. If there is no map or capture information,
9578 /// return nullptr by reference.
9579 static void emitOffloadingArrays(
9580     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9581     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9582     bool IsNonContiguous = false) {
9583   CodeGenModule &CGM = CGF.CGM;
9584   ASTContext &Ctx = CGF.getContext();
9585 
9586   // Reset the array information.
9587   Info.clearArrayInfo();
9588   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9589 
9590   if (Info.NumberOfPtrs) {
9591     // Detect if we have any capture size requiring runtime evaluation of the
9592     // size so that a constant array could be eventually used.
9593 
9594     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9595     QualType PointerArrayType = Ctx.getConstantArrayType(
9596         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9597         /*IndexTypeQuals=*/0);
9598 
9599     Info.BasePointersArray =
9600         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9601     Info.PointersArray =
9602         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9603     Address MappersArray =
9604         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9605     Info.MappersArray = MappersArray.getPointer();
9606 
9607     // If we don't have any VLA types or other types that require runtime
9608     // evaluation, we can use a constant array for the map sizes, otherwise we
9609     // need to fill up the arrays as we do for the pointers.
9610     QualType Int64Ty =
9611         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9612     SmallVector<llvm::Constant *> ConstSizes(
9613         CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9614     llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9615     for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9616       if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9617         if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9618           if (IsNonContiguous && (CombinedInfo.Types[I] &
9619                                   MappableExprsHandler::OMP_MAP_NON_CONTIG))
9620             ConstSizes[I] = llvm::ConstantInt::get(
9621                 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9622           else
9623             ConstSizes[I] = CI;
9624           continue;
9625         }
9626       }
9627       RuntimeSizes.set(I);
9628     }
9629 
9630     if (RuntimeSizes.all()) {
9631       QualType SizeArrayType = Ctx.getConstantArrayType(
9632           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9633           /*IndexTypeQuals=*/0);
9634       Info.SizesArray =
9635           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9636     } else {
9637       auto *SizesArrayInit = llvm::ConstantArray::get(
9638           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9639       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9640       auto *SizesArrayGbl = new llvm::GlobalVariable(
9641           CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9642           llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9643       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9644       if (RuntimeSizes.any()) {
9645         QualType SizeArrayType = Ctx.getConstantArrayType(
9646             Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9647             /*IndexTypeQuals=*/0);
9648         Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9649         llvm::Value *GblConstPtr =
9650             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9651                 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9652         CGF.Builder.CreateMemCpy(
9653             Buffer,
9654             Address(GblConstPtr, CGM.Int64Ty,
9655                     CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9656                         /*DestWidth=*/64, /*Signed=*/false))),
9657             CGF.getTypeSize(SizeArrayType));
9658         Info.SizesArray = Buffer.getPointer();
9659       } else {
9660         Info.SizesArray = SizesArrayGbl;
9661       }
9662     }
9663 
9664     // The map types are always constant so we don't need to generate code to
9665     // fill arrays. Instead, we create an array constant.
9666     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9667     llvm::copy(CombinedInfo.Types, Mapping.begin());
9668     std::string MaptypesName =
9669         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9670     auto *MapTypesArrayGbl =
9671         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9672     Info.MapTypesArray = MapTypesArrayGbl;
9673 
9674     // The information types are only built if there is debug information
9675     // requested.
9676     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9677       Info.MapNamesArray = llvm::Constant::getNullValue(
9678           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9679     } else {
9680       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9681         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9682       };
9683       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9684       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9685       std::string MapnamesName =
9686           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9687       auto *MapNamesArrayGbl =
9688           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9689       Info.MapNamesArray = MapNamesArrayGbl;
9690     }
9691 
9692     // If there's a present map type modifier, it must not be applied to the end
9693     // of a region, so generate a separate map type array in that case.
9694     if (Info.separateBeginEndCalls()) {
9695       bool EndMapTypesDiffer = false;
9696       for (uint64_t &Type : Mapping) {
9697         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9698           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9699           EndMapTypesDiffer = true;
9700         }
9701       }
9702       if (EndMapTypesDiffer) {
9703         MapTypesArrayGbl =
9704             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9705         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9706       }
9707     }
9708 
9709     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9710       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9711       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9712           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9713           Info.BasePointersArray, 0, I);
9714       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9715           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9716       Address BPAddr =
9717           Address::deprecated(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9718       CGF.Builder.CreateStore(BPVal, BPAddr);
9719 
9720       if (Info.requiresDevicePointerInfo())
9721         if (const ValueDecl *DevVD =
9722                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9723           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9724 
9725       llvm::Value *PVal = CombinedInfo.Pointers[I];
9726       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9727           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9728           Info.PointersArray, 0, I);
9729       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9730           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9731       Address PAddr =
9732           Address::deprecated(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9733       CGF.Builder.CreateStore(PVal, PAddr);
9734 
9735       if (RuntimeSizes.test(I)) {
9736         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9737             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9738             Info.SizesArray,
9739             /*Idx0=*/0,
9740             /*Idx1=*/I);
9741         Address SAddr =
9742             Address::deprecated(S, Ctx.getTypeAlignInChars(Int64Ty));
9743         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9744                                                           CGM.Int64Ty,
9745                                                           /*isSigned=*/true),
9746                                 SAddr);
9747       }
9748 
9749       // Fill up the mapper array.
9750       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9751       if (CombinedInfo.Mappers[I]) {
9752         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9753             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9754         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9755         Info.HasMapper = true;
9756       }
9757       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9758       CGF.Builder.CreateStore(MFunc, MAddr);
9759     }
9760   }
9761 
9762   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9763       Info.NumberOfPtrs == 0)
9764     return;
9765 
9766   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9767 }
9768 
9769 namespace {
9770 /// Additional arguments for emitOffloadingArraysArgument function.
9771 struct ArgumentsOptions {
9772   bool ForEndCall = false;
9773   ArgumentsOptions() = default;
9774   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9775 };
9776 } // namespace
9777 
9778 /// Emit the arguments to be passed to the runtime library based on the
9779 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9780 /// ForEndCall, emit map types to be passed for the end of the region instead of
9781 /// the beginning.
9782 static void emitOffloadingArraysArgument(
9783     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9784     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9785     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9786     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9787     const ArgumentsOptions &Options = ArgumentsOptions()) {
9788   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9789          "expected region end call to runtime only when end call is separate");
9790   CodeGenModule &CGM = CGF.CGM;
9791   if (Info.NumberOfPtrs) {
9792     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9793         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9794         Info.BasePointersArray,
9795         /*Idx0=*/0, /*Idx1=*/0);
9796     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9797         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9798         Info.PointersArray,
9799         /*Idx0=*/0,
9800         /*Idx1=*/0);
9801     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9802         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9803         /*Idx0=*/0, /*Idx1=*/0);
9804     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9805         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9806         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9807                                                     : Info.MapTypesArray,
9808         /*Idx0=*/0,
9809         /*Idx1=*/0);
9810 
9811     // Only emit the mapper information arrays if debug information is
9812     // requested.
9813     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9814       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9815     else
9816       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9817           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9818           Info.MapNamesArray,
9819           /*Idx0=*/0,
9820           /*Idx1=*/0);
9821     // If there is no user-defined mapper, set the mapper array to nullptr to
9822     // avoid an unnecessary data privatization
9823     if (!Info.HasMapper)
9824       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9825     else
9826       MappersArrayArg =
9827           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9828   } else {
9829     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9830     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9831     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9832     MapTypesArrayArg =
9833         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9834     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9835     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9836   }
9837 }
9838 
9839 /// Check for inner distribute directive.
9840 static const OMPExecutableDirective *
9841 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9842   const auto *CS = D.getInnermostCapturedStmt();
9843   const auto *Body =
9844       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9845   const Stmt *ChildStmt =
9846       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9847 
9848   if (const auto *NestedDir =
9849           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9850     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9851     switch (D.getDirectiveKind()) {
9852     case OMPD_target:
9853       if (isOpenMPDistributeDirective(DKind))
9854         return NestedDir;
9855       if (DKind == OMPD_teams) {
9856         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9857             /*IgnoreCaptured=*/true);
9858         if (!Body)
9859           return nullptr;
9860         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9861         if (const auto *NND =
9862                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9863           DKind = NND->getDirectiveKind();
9864           if (isOpenMPDistributeDirective(DKind))
9865             return NND;
9866         }
9867       }
9868       return nullptr;
9869     case OMPD_target_teams:
9870       if (isOpenMPDistributeDirective(DKind))
9871         return NestedDir;
9872       return nullptr;
9873     case OMPD_target_parallel:
9874     case OMPD_target_simd:
9875     case OMPD_target_parallel_for:
9876     case OMPD_target_parallel_for_simd:
9877       return nullptr;
9878     case OMPD_target_teams_distribute:
9879     case OMPD_target_teams_distribute_simd:
9880     case OMPD_target_teams_distribute_parallel_for:
9881     case OMPD_target_teams_distribute_parallel_for_simd:
9882     case OMPD_parallel:
9883     case OMPD_for:
9884     case OMPD_parallel_for:
9885     case OMPD_parallel_master:
9886     case OMPD_parallel_sections:
9887     case OMPD_for_simd:
9888     case OMPD_parallel_for_simd:
9889     case OMPD_cancel:
9890     case OMPD_cancellation_point:
9891     case OMPD_ordered:
9892     case OMPD_threadprivate:
9893     case OMPD_allocate:
9894     case OMPD_task:
9895     case OMPD_simd:
9896     case OMPD_tile:
9897     case OMPD_unroll:
9898     case OMPD_sections:
9899     case OMPD_section:
9900     case OMPD_single:
9901     case OMPD_master:
9902     case OMPD_critical:
9903     case OMPD_taskyield:
9904     case OMPD_barrier:
9905     case OMPD_taskwait:
9906     case OMPD_taskgroup:
9907     case OMPD_atomic:
9908     case OMPD_flush:
9909     case OMPD_depobj:
9910     case OMPD_scan:
9911     case OMPD_teams:
9912     case OMPD_target_data:
9913     case OMPD_target_exit_data:
9914     case OMPD_target_enter_data:
9915     case OMPD_distribute:
9916     case OMPD_distribute_simd:
9917     case OMPD_distribute_parallel_for:
9918     case OMPD_distribute_parallel_for_simd:
9919     case OMPD_teams_distribute:
9920     case OMPD_teams_distribute_simd:
9921     case OMPD_teams_distribute_parallel_for:
9922     case OMPD_teams_distribute_parallel_for_simd:
9923     case OMPD_target_update:
9924     case OMPD_declare_simd:
9925     case OMPD_declare_variant:
9926     case OMPD_begin_declare_variant:
9927     case OMPD_end_declare_variant:
9928     case OMPD_declare_target:
9929     case OMPD_end_declare_target:
9930     case OMPD_declare_reduction:
9931     case OMPD_declare_mapper:
9932     case OMPD_taskloop:
9933     case OMPD_taskloop_simd:
9934     case OMPD_master_taskloop:
9935     case OMPD_master_taskloop_simd:
9936     case OMPD_parallel_master_taskloop:
9937     case OMPD_parallel_master_taskloop_simd:
9938     case OMPD_requires:
9939     case OMPD_metadirective:
9940     case OMPD_unknown:
9941     default:
9942       llvm_unreachable("Unexpected directive.");
9943     }
9944   }
9945 
9946   return nullptr;
9947 }
9948 
9949 /// Emit the user-defined mapper function. The code generation follows the
9950 /// pattern in the example below.
9951 /// \code
9952 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9953 ///                                           void *base, void *begin,
9954 ///                                           int64_t size, int64_t type,
9955 ///                                           void *name = nullptr) {
9956 ///   // Allocate space for an array section first or add a base/begin for
9957 ///   // pointer dereference.
9958 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9959 ///       !maptype.IsDelete)
9960 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9961 ///                                 size*sizeof(Ty), clearToFromMember(type));
9962 ///   // Map members.
9963 ///   for (unsigned i = 0; i < size; i++) {
9964 ///     // For each component specified by this mapper:
9965 ///     for (auto c : begin[i]->all_components) {
9966 ///       if (c.hasMapper())
9967 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9968 ///                       c.arg_type, c.arg_name);
9969 ///       else
9970 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9971 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9972 ///                                     c.arg_name);
9973 ///     }
9974 ///   }
9975 ///   // Delete the array section.
9976 ///   if (size > 1 && maptype.IsDelete)
9977 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9978 ///                                 size*sizeof(Ty), clearToFromMember(type));
9979 /// }
9980 /// \endcode
9981 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9982                                             CodeGenFunction *CGF) {
9983   if (UDMMap.count(D) > 0)
9984     return;
9985   ASTContext &C = CGM.getContext();
9986   QualType Ty = D->getType();
9987   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9988   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9989   auto *MapperVarDecl =
9990       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9991   SourceLocation Loc = D->getLocation();
9992   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9993 
9994   // Prepare mapper function arguments and attributes.
9995   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9996                               C.VoidPtrTy, ImplicitParamDecl::Other);
9997   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9998                             ImplicitParamDecl::Other);
9999   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
10000                              C.VoidPtrTy, ImplicitParamDecl::Other);
10001   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
10002                             ImplicitParamDecl::Other);
10003   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
10004                             ImplicitParamDecl::Other);
10005   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
10006                             ImplicitParamDecl::Other);
10007   FunctionArgList Args;
10008   Args.push_back(&HandleArg);
10009   Args.push_back(&BaseArg);
10010   Args.push_back(&BeginArg);
10011   Args.push_back(&SizeArg);
10012   Args.push_back(&TypeArg);
10013   Args.push_back(&NameArg);
10014   const CGFunctionInfo &FnInfo =
10015       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
10016   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
10017   SmallString<64> TyStr;
10018   llvm::raw_svector_ostream Out(TyStr);
10019   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
10020   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10021   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
10022                                     Name, &CGM.getModule());
10023   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
10024   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
10025   // Start the mapper function code generation.
10026   CodeGenFunction MapperCGF(CGM);
10027   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
10028   // Compute the starting and end addresses of array elements.
10029   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
10030       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10031       C.getPointerType(Int64Ty), Loc);
10032   // Prepare common arguments for array initiation and deletion.
10033   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10034       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10035       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10036   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10037       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10038       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10039   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10040       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10041       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10042   // Convert the size in bytes into the number of array elements.
10043   Size = MapperCGF.Builder.CreateExactUDiv(
10044       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10045   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10046       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10047   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10048       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10049   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10050       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10051       C.getPointerType(Int64Ty), Loc);
10052   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10053       MapperCGF.GetAddrOfLocalVar(&NameArg),
10054       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10055 
10056   // Emit array initiation if this is an array section and \p MapType indicates
10057   // that memory allocation is required.
10058   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10059   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10060                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10061 
10062   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10063 
10064   // Emit the loop header block.
10065   MapperCGF.EmitBlock(HeadBB);
10066   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10067   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10068   // Evaluate whether the initial condition is satisfied.
10069   llvm::Value *IsEmpty =
10070       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10071   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10072   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10073 
10074   // Emit the loop body block.
10075   MapperCGF.EmitBlock(BodyBB);
10076   llvm::BasicBlock *LastBB = BodyBB;
10077   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10078       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10079   PtrPHI->addIncoming(PtrBegin, EntryBB);
10080   Address PtrCurrent =
10081       Address::deprecated(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10082                                       .getAlignment()
10083                                       .alignmentOfArrayElement(ElementSize));
10084   // Privatize the declared variable of mapper to be the current array element.
10085   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10086   Scope.addPrivate(MapperVarDecl, PtrCurrent);
10087   (void)Scope.Privatize();
10088 
10089   // Get map clause information. Fill up the arrays with all mapped variables.
10090   MappableExprsHandler::MapCombinedInfoTy Info;
10091   MappableExprsHandler MEHandler(*D, MapperCGF);
10092   MEHandler.generateAllInfoForMapper(Info);
10093 
10094   // Call the runtime API __tgt_mapper_num_components to get the number of
10095   // pre-existing components.
10096   llvm::Value *OffloadingArgs[] = {Handle};
10097   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10098       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10099                                             OMPRTL___tgt_mapper_num_components),
10100       OffloadingArgs);
10101   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10102       PreviousSize,
10103       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10104 
10105   // Fill up the runtime mapper handle for all components.
10106   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10107     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10108         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10109     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10110         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10111     llvm::Value *CurSizeArg = Info.Sizes[I];
10112     llvm::Value *CurNameArg =
10113         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10114             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10115             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10116 
10117     // Extract the MEMBER_OF field from the map type.
10118     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10119     llvm::Value *MemberMapType =
10120         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10121 
10122     // Combine the map type inherited from user-defined mapper with that
10123     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10124     // bits of the \a MapType, which is the input argument of the mapper
10125     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10126     // bits of MemberMapType.
10127     // [OpenMP 5.0], 1.2.6. map-type decay.
10128     //        | alloc |  to   | from  | tofrom | release | delete
10129     // ----------------------------------------------------------
10130     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10131     // to     | alloc |  to   | alloc |   to   | release | delete
10132     // from   | alloc | alloc | from  |  from  | release | delete
10133     // tofrom | alloc |  to   | from  | tofrom | release | delete
10134     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10135         MapType,
10136         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10137                                    MappableExprsHandler::OMP_MAP_FROM));
10138     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10139     llvm::BasicBlock *AllocElseBB =
10140         MapperCGF.createBasicBlock("omp.type.alloc.else");
10141     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10142     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10143     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10144     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10145     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10146     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10147     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10148     MapperCGF.EmitBlock(AllocBB);
10149     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10150         MemberMapType,
10151         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10152                                      MappableExprsHandler::OMP_MAP_FROM)));
10153     MapperCGF.Builder.CreateBr(EndBB);
10154     MapperCGF.EmitBlock(AllocElseBB);
10155     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10156         LeftToFrom,
10157         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10158     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10159     // In case of to, clear OMP_MAP_FROM.
10160     MapperCGF.EmitBlock(ToBB);
10161     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10162         MemberMapType,
10163         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10164     MapperCGF.Builder.CreateBr(EndBB);
10165     MapperCGF.EmitBlock(ToElseBB);
10166     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10167         LeftToFrom,
10168         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10169     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10170     // In case of from, clear OMP_MAP_TO.
10171     MapperCGF.EmitBlock(FromBB);
10172     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10173         MemberMapType,
10174         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10175     // In case of tofrom, do nothing.
10176     MapperCGF.EmitBlock(EndBB);
10177     LastBB = EndBB;
10178     llvm::PHINode *CurMapType =
10179         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10180     CurMapType->addIncoming(AllocMapType, AllocBB);
10181     CurMapType->addIncoming(ToMapType, ToBB);
10182     CurMapType->addIncoming(FromMapType, FromBB);
10183     CurMapType->addIncoming(MemberMapType, ToElseBB);
10184 
10185     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10186                                      CurSizeArg, CurMapType, CurNameArg};
10187     if (Info.Mappers[I]) {
10188       // Call the corresponding mapper function.
10189       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10190           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10191       assert(MapperFunc && "Expect a valid mapper function is available.");
10192       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10193     } else {
10194       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10195       // data structure.
10196       MapperCGF.EmitRuntimeCall(
10197           OMPBuilder.getOrCreateRuntimeFunction(
10198               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10199           OffloadingArgs);
10200     }
10201   }
10202 
10203   // Update the pointer to point to the next element that needs to be mapped,
10204   // and check whether we have mapped all elements.
10205   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10206   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10207       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10208   PtrPHI->addIncoming(PtrNext, LastBB);
10209   llvm::Value *IsDone =
10210       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10211   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10212   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10213 
10214   MapperCGF.EmitBlock(ExitBB);
10215   // Emit array deletion if this is an array section and \p MapType indicates
10216   // that deletion is required.
10217   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10218                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10219 
10220   // Emit the function exit block.
10221   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10222   MapperCGF.FinishFunction();
10223   UDMMap.try_emplace(D, Fn);
10224   if (CGF) {
10225     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10226     Decls.second.push_back(D);
10227   }
10228 }
10229 
10230 /// Emit the array initialization or deletion portion for user-defined mapper
10231 /// code generation. First, it evaluates whether an array section is mapped and
10232 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10233 /// true, and \a MapType indicates to not delete this array, array
10234 /// initialization code is generated. If \a IsInit is false, and \a MapType
10235 /// indicates to not this array, array deletion code is generated.
10236 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10237     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10238     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10239     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10240     bool IsInit) {
10241   StringRef Prefix = IsInit ? ".init" : ".del";
10242 
10243   // Evaluate if this is an array section.
10244   llvm::BasicBlock *BodyBB =
10245       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10246   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10247       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10248   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10249       MapType,
10250       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10251   llvm::Value *DeleteCond;
10252   llvm::Value *Cond;
10253   if (IsInit) {
10254     // base != begin?
10255     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10256     // IsPtrAndObj?
10257     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10258         MapType,
10259         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10260     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10261     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10262     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10263     DeleteCond = MapperCGF.Builder.CreateIsNull(
10264         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10265   } else {
10266     Cond = IsArray;
10267     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10268         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10269   }
10270   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10271   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10272 
10273   MapperCGF.EmitBlock(BodyBB);
10274   // Get the array size by multiplying element size and element number (i.e., \p
10275   // Size).
10276   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10277       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10278   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10279   // memory allocation/deletion purpose only.
10280   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10281       MapType,
10282       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10283                                    MappableExprsHandler::OMP_MAP_FROM)));
10284   MapTypeArg = MapperCGF.Builder.CreateOr(
10285       MapTypeArg,
10286       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10287 
10288   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10289   // data structure.
10290   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10291                                    ArraySize, MapTypeArg, MapName};
10292   MapperCGF.EmitRuntimeCall(
10293       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10294                                             OMPRTL___tgt_push_mapper_component),
10295       OffloadingArgs);
10296 }
10297 
10298 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10299     const OMPDeclareMapperDecl *D) {
10300   auto I = UDMMap.find(D);
10301   if (I != UDMMap.end())
10302     return I->second;
10303   emitUserDefinedMapper(D);
10304   return UDMMap.lookup(D);
10305 }
10306 
10307 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10308     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10309     llvm::Value *DeviceID,
10310     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10311                                      const OMPLoopDirective &D)>
10312         SizeEmitter) {
10313   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10314   const OMPExecutableDirective *TD = &D;
10315   // Get nested teams distribute kind directive, if any.
10316   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10317     TD = getNestedDistributeDirective(CGM.getContext(), D);
10318   if (!TD)
10319     return;
10320   const auto *LD = cast<OMPLoopDirective>(TD);
10321   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10322                                                          PrePostActionTy &) {
10323     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10324       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10325       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10326       CGF.EmitRuntimeCall(
10327           OMPBuilder.getOrCreateRuntimeFunction(
10328               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10329           Args);
10330     }
10331   };
10332   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10333 }
10334 
10335 void CGOpenMPRuntime::emitTargetCall(
10336     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10337     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10338     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10339     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10340                                      const OMPLoopDirective &D)>
10341         SizeEmitter) {
10342   if (!CGF.HaveInsertPoint())
10343     return;
10344 
10345   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
10346                                    CGM.getLangOpts().OpenMPOffloadMandatory;
10347 
10348   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10349 
10350   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10351                                  D.hasClausesOfKind<OMPNowaitClause>();
10352   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10353   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10354   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10355                                             PrePostActionTy &) {
10356     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10357   };
10358   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10359 
10360   CodeGenFunction::OMPTargetDataInfo InputInfo;
10361   llvm::Value *MapTypesArray = nullptr;
10362   llvm::Value *MapNamesArray = nullptr;
10363   // Generate code for the host fallback function.
10364   auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
10365                         &CS, OffloadingMandatory](CodeGenFunction &CGF) {
10366     if (OffloadingMandatory) {
10367       CGF.Builder.CreateUnreachable();
10368     } else {
10369       if (RequiresOuterTask) {
10370         CapturedVars.clear();
10371         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10372       }
10373       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10374     }
10375   };
10376   // Fill up the pointer arrays and transfer execution to the device.
10377   auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
10378                     &MapNamesArray, SizeEmitter,
10379                     FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10380     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10381       // Reverse offloading is not supported, so just execute on the host.
10382       FallbackGen(CGF);
10383       return;
10384     }
10385 
10386     // On top of the arrays that were filled up, the target offloading call
10387     // takes as arguments the device id as well as the host pointer. The host
10388     // pointer is used by the runtime library to identify the current target
10389     // region, so it only has to be unique and not necessarily point to
10390     // anything. It could be the pointer to the outlined function that
10391     // implements the target region, but we aren't using that so that the
10392     // compiler doesn't need to keep that, and could therefore inline the host
10393     // function if proven worthwhile during optimization.
10394 
10395     // From this point on, we need to have an ID of the target region defined.
10396     assert(OutlinedFnID && "Invalid outlined function ID!");
10397     (void)OutlinedFnID;
10398 
10399     // Emit device ID if any.
10400     llvm::Value *DeviceID;
10401     if (Device.getPointer()) {
10402       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10403               Device.getInt() == OMPC_DEVICE_device_num) &&
10404              "Expected device_num modifier.");
10405       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10406       DeviceID =
10407           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10408     } else {
10409       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10410     }
10411 
10412     // Emit the number of elements in the offloading arrays.
10413     llvm::Value *PointerNum =
10414         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10415 
10416     // Return value of the runtime offloading call.
10417     llvm::Value *Return;
10418 
10419     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10420     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10421 
10422     // Source location for the ident struct
10423     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10424 
10425     // Emit tripcount for the target loop-based directive.
10426     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10427 
10428     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10429     // The target region is an outlined function launched by the runtime
10430     // via calls __tgt_target() or __tgt_target_teams().
10431     //
10432     // __tgt_target() launches a target region with one team and one thread,
10433     // executing a serial region.  This master thread may in turn launch
10434     // more threads within its team upon encountering a parallel region,
10435     // however, no additional teams can be launched on the device.
10436     //
10437     // __tgt_target_teams() launches a target region with one or more teams,
10438     // each with one or more threads.  This call is required for target
10439     // constructs such as:
10440     //  'target teams'
10441     //  'target' / 'teams'
10442     //  'target teams distribute parallel for'
10443     //  'target parallel'
10444     // and so on.
10445     //
10446     // Note that on the host and CPU targets, the runtime implementation of
10447     // these calls simply call the outlined function without forking threads.
10448     // The outlined functions themselves have runtime calls to
10449     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10450     // the compiler in emitTeamsCall() and emitParallelCall().
10451     //
10452     // In contrast, on the NVPTX target, the implementation of
10453     // __tgt_target_teams() launches a GPU kernel with the requested number
10454     // of teams and threads so no additional calls to the runtime are required.
10455     if (NumTeams) {
10456       // If we have NumTeams defined this means that we have an enclosed teams
10457       // region. Therefore we also expect to have NumThreads defined. These two
10458       // values should be defined in the presence of a teams directive,
10459       // regardless of having any clauses associated. If the user is using teams
10460       // but no clauses, these two values will be the default that should be
10461       // passed to the runtime library - a 32-bit integer with the value zero.
10462       assert(NumThreads && "Thread limit expression should be available along "
10463                            "with number of teams.");
10464       SmallVector<llvm::Value *> OffloadingArgs = {
10465           RTLoc,
10466           DeviceID,
10467           OutlinedFnID,
10468           PointerNum,
10469           InputInfo.BasePointersArray.getPointer(),
10470           InputInfo.PointersArray.getPointer(),
10471           InputInfo.SizesArray.getPointer(),
10472           MapTypesArray,
10473           MapNamesArray,
10474           InputInfo.MappersArray.getPointer(),
10475           NumTeams,
10476           NumThreads};
10477       if (HasNowait) {
10478         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10479         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10480         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10481         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10482         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10483         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10484       }
10485       Return = CGF.EmitRuntimeCall(
10486           OMPBuilder.getOrCreateRuntimeFunction(
10487               CGM.getModule(), HasNowait
10488                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10489                                    : OMPRTL___tgt_target_teams_mapper),
10490           OffloadingArgs);
10491     } else {
10492       SmallVector<llvm::Value *> OffloadingArgs = {
10493           RTLoc,
10494           DeviceID,
10495           OutlinedFnID,
10496           PointerNum,
10497           InputInfo.BasePointersArray.getPointer(),
10498           InputInfo.PointersArray.getPointer(),
10499           InputInfo.SizesArray.getPointer(),
10500           MapTypesArray,
10501           MapNamesArray,
10502           InputInfo.MappersArray.getPointer()};
10503       if (HasNowait) {
10504         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10505         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10506         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10507         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10508         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10509         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10510       }
10511       Return = CGF.EmitRuntimeCall(
10512           OMPBuilder.getOrCreateRuntimeFunction(
10513               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10514                                          : OMPRTL___tgt_target_mapper),
10515           OffloadingArgs);
10516     }
10517 
10518     // Check the error code and execute the host version if required.
10519     llvm::BasicBlock *OffloadFailedBlock =
10520         CGF.createBasicBlock("omp_offload.failed");
10521     llvm::BasicBlock *OffloadContBlock =
10522         CGF.createBasicBlock("omp_offload.cont");
10523     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10524     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10525 
10526     CGF.EmitBlock(OffloadFailedBlock);
10527     FallbackGen(CGF);
10528 
10529     CGF.EmitBranch(OffloadContBlock);
10530 
10531     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10532   };
10533 
10534   // Notify that the host version must be executed.
10535   auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10536     FallbackGen(CGF);
10537   };
10538 
10539   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10540                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10541                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10542     // Fill up the arrays with all the captured variables.
10543     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10544 
10545     // Get mappable expression information.
10546     MappableExprsHandler MEHandler(D, CGF);
10547     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10548     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10549 
10550     auto RI = CS.getCapturedRecordDecl()->field_begin();
10551     auto *CV = CapturedVars.begin();
10552     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10553                                               CE = CS.capture_end();
10554          CI != CE; ++CI, ++RI, ++CV) {
10555       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10556       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10557 
10558       // VLA sizes are passed to the outlined region by copy and do not have map
10559       // information associated.
10560       if (CI->capturesVariableArrayType()) {
10561         CurInfo.Exprs.push_back(nullptr);
10562         CurInfo.BasePointers.push_back(*CV);
10563         CurInfo.Pointers.push_back(*CV);
10564         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10565             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10566         // Copy to the device as an argument. No need to retrieve it.
10567         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10568                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10569                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10570         CurInfo.Mappers.push_back(nullptr);
10571       } else {
10572         // If we have any information in the map clause, we use it, otherwise we
10573         // just do a default mapping.
10574         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10575         if (!CI->capturesThis())
10576           MappedVarSet.insert(CI->getCapturedVar());
10577         else
10578           MappedVarSet.insert(nullptr);
10579         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10580           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10581         // Generate correct mapping for variables captured by reference in
10582         // lambdas.
10583         if (CI->capturesVariable())
10584           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10585                                                   CurInfo, LambdaPointers);
10586       }
10587       // We expect to have at least an element of information for this capture.
10588       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10589              "Non-existing map pointer for capture!");
10590       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10591              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10592              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10593              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10594              "Inconsistent map information sizes!");
10595 
10596       // If there is an entry in PartialStruct it means we have a struct with
10597       // individual members mapped. Emit an extra combined entry.
10598       if (PartialStruct.Base.isValid()) {
10599         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10600         MEHandler.emitCombinedEntry(
10601             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10602             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10603       }
10604 
10605       // We need to append the results of this capture to what we already have.
10606       CombinedInfo.append(CurInfo);
10607     }
10608     // Adjust MEMBER_OF flags for the lambdas captures.
10609     MEHandler.adjustMemberOfForLambdaCaptures(
10610         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10611         CombinedInfo.Types);
10612     // Map any list items in a map clause that were not captures because they
10613     // weren't referenced within the construct.
10614     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10615 
10616     TargetDataInfo Info;
10617     // Fill up the arrays and create the arguments.
10618     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10619     emitOffloadingArraysArgument(
10620         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10621         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10622         {/*ForEndCall=*/false});
10623 
10624     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10625     InputInfo.BasePointersArray =
10626         Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign());
10627     InputInfo.PointersArray =
10628         Address::deprecated(Info.PointersArray, CGM.getPointerAlign());
10629     InputInfo.SizesArray =
10630         Address::deprecated(Info.SizesArray, CGM.getPointerAlign());
10631     InputInfo.MappersArray =
10632         Address::deprecated(Info.MappersArray, CGM.getPointerAlign());
10633     MapTypesArray = Info.MapTypesArray;
10634     MapNamesArray = Info.MapNamesArray;
10635     if (RequiresOuterTask)
10636       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10637     else
10638       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10639   };
10640 
10641   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10642                              CodeGenFunction &CGF, PrePostActionTy &) {
10643     if (RequiresOuterTask) {
10644       CodeGenFunction::OMPTargetDataInfo InputInfo;
10645       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10646     } else {
10647       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10648     }
10649   };
10650 
10651   // If we have a target function ID it means that we need to support
10652   // offloading, otherwise, just execute on the host. We need to execute on host
10653   // regardless of the conditional in the if clause if, e.g., the user do not
10654   // specify target triples.
10655   if (OutlinedFnID) {
10656     if (IfCond) {
10657       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10658     } else {
10659       RegionCodeGenTy ThenRCG(TargetThenGen);
10660       ThenRCG(CGF);
10661     }
10662   } else {
10663     RegionCodeGenTy ElseRCG(TargetElseGen);
10664     ElseRCG(CGF);
10665   }
10666 }
10667 
10668 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10669                                                     StringRef ParentName) {
10670   if (!S)
10671     return;
10672 
10673   // Codegen OMP target directives that offload compute to the device.
10674   bool RequiresDeviceCodegen =
10675       isa<OMPExecutableDirective>(S) &&
10676       isOpenMPTargetExecutionDirective(
10677           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10678 
10679   if (RequiresDeviceCodegen) {
10680     const auto &E = *cast<OMPExecutableDirective>(S);
10681     unsigned DeviceID;
10682     unsigned FileID;
10683     unsigned Line;
10684     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10685                              FileID, Line);
10686 
10687     // Is this a target region that should not be emitted as an entry point? If
10688     // so just signal we are done with this target region.
10689     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10690                                                             ParentName, Line))
10691       return;
10692 
10693     switch (E.getDirectiveKind()) {
10694     case OMPD_target:
10695       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10696                                                    cast<OMPTargetDirective>(E));
10697       break;
10698     case OMPD_target_parallel:
10699       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10700           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10701       break;
10702     case OMPD_target_teams:
10703       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10704           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10705       break;
10706     case OMPD_target_teams_distribute:
10707       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10708           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10709       break;
10710     case OMPD_target_teams_distribute_simd:
10711       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10712           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10713       break;
10714     case OMPD_target_parallel_for:
10715       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10716           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10717       break;
10718     case OMPD_target_parallel_for_simd:
10719       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10720           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10721       break;
10722     case OMPD_target_simd:
10723       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10724           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10725       break;
10726     case OMPD_target_teams_distribute_parallel_for:
10727       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10728           CGM, ParentName,
10729           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10730       break;
10731     case OMPD_target_teams_distribute_parallel_for_simd:
10732       CodeGenFunction::
10733           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10734               CGM, ParentName,
10735               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10736       break;
10737     case OMPD_parallel:
10738     case OMPD_for:
10739     case OMPD_parallel_for:
10740     case OMPD_parallel_master:
10741     case OMPD_parallel_sections:
10742     case OMPD_for_simd:
10743     case OMPD_parallel_for_simd:
10744     case OMPD_cancel:
10745     case OMPD_cancellation_point:
10746     case OMPD_ordered:
10747     case OMPD_threadprivate:
10748     case OMPD_allocate:
10749     case OMPD_task:
10750     case OMPD_simd:
10751     case OMPD_tile:
10752     case OMPD_unroll:
10753     case OMPD_sections:
10754     case OMPD_section:
10755     case OMPD_single:
10756     case OMPD_master:
10757     case OMPD_critical:
10758     case OMPD_taskyield:
10759     case OMPD_barrier:
10760     case OMPD_taskwait:
10761     case OMPD_taskgroup:
10762     case OMPD_atomic:
10763     case OMPD_flush:
10764     case OMPD_depobj:
10765     case OMPD_scan:
10766     case OMPD_teams:
10767     case OMPD_target_data:
10768     case OMPD_target_exit_data:
10769     case OMPD_target_enter_data:
10770     case OMPD_distribute:
10771     case OMPD_distribute_simd:
10772     case OMPD_distribute_parallel_for:
10773     case OMPD_distribute_parallel_for_simd:
10774     case OMPD_teams_distribute:
10775     case OMPD_teams_distribute_simd:
10776     case OMPD_teams_distribute_parallel_for:
10777     case OMPD_teams_distribute_parallel_for_simd:
10778     case OMPD_target_update:
10779     case OMPD_declare_simd:
10780     case OMPD_declare_variant:
10781     case OMPD_begin_declare_variant:
10782     case OMPD_end_declare_variant:
10783     case OMPD_declare_target:
10784     case OMPD_end_declare_target:
10785     case OMPD_declare_reduction:
10786     case OMPD_declare_mapper:
10787     case OMPD_taskloop:
10788     case OMPD_taskloop_simd:
10789     case OMPD_master_taskloop:
10790     case OMPD_master_taskloop_simd:
10791     case OMPD_parallel_master_taskloop:
10792     case OMPD_parallel_master_taskloop_simd:
10793     case OMPD_requires:
10794     case OMPD_metadirective:
10795     case OMPD_unknown:
10796     default:
10797       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10798     }
10799     return;
10800   }
10801 
10802   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10803     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10804       return;
10805 
10806     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10807     return;
10808   }
10809 
10810   // If this is a lambda function, look into its body.
10811   if (const auto *L = dyn_cast<LambdaExpr>(S))
10812     S = L->getBody();
10813 
10814   // Keep looking for target regions recursively.
10815   for (const Stmt *II : S->children())
10816     scanForTargetRegionsFunctions(II, ParentName);
10817 }
10818 
10819 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10820   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10821       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10822   if (!DevTy)
10823     return false;
10824   // Do not emit device_type(nohost) functions for the host.
10825   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10826     return true;
10827   // Do not emit device_type(host) functions for the device.
10828   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10829     return true;
10830   return false;
10831 }
10832 
10833 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10834   // If emitting code for the host, we do not process FD here. Instead we do
10835   // the normal code generation.
10836   if (!CGM.getLangOpts().OpenMPIsDevice) {
10837     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10838       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10839                                   CGM.getLangOpts().OpenMPIsDevice))
10840         return true;
10841     return false;
10842   }
10843 
10844   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10845   // Try to detect target regions in the function.
10846   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10847     StringRef Name = CGM.getMangledName(GD);
10848     scanForTargetRegionsFunctions(FD->getBody(), Name);
10849     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10850                                 CGM.getLangOpts().OpenMPIsDevice))
10851       return true;
10852   }
10853 
10854   // Do not to emit function if it is not marked as declare target.
10855   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10856          AlreadyEmittedTargetDecls.count(VD) == 0;
10857 }
10858 
10859 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10860   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10861                               CGM.getLangOpts().OpenMPIsDevice))
10862     return true;
10863 
10864   if (!CGM.getLangOpts().OpenMPIsDevice)
10865     return false;
10866 
10867   // Check if there are Ctors/Dtors in this declaration and look for target
10868   // regions in it. We use the complete variant to produce the kernel name
10869   // mangling.
10870   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10871   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10872     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10873       StringRef ParentName =
10874           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10875       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10876     }
10877     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10878       StringRef ParentName =
10879           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10880       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10881     }
10882   }
10883 
10884   // Do not to emit variable if it is not marked as declare target.
10885   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10886       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10887           cast<VarDecl>(GD.getDecl()));
10888   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10889       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10890        HasRequiresUnifiedSharedMemory)) {
10891     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10892     return true;
10893   }
10894   return false;
10895 }
10896 
10897 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10898                                                    llvm::Constant *Addr) {
10899   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10900       !CGM.getLangOpts().OpenMPIsDevice)
10901     return;
10902 
10903   // If we have host/nohost variables, they do not need to be registered.
10904   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10905       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10906   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10907     return;
10908 
10909   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10910       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10911   if (!Res) {
10912     if (CGM.getLangOpts().OpenMPIsDevice) {
10913       // Register non-target variables being emitted in device code (debug info
10914       // may cause this).
10915       StringRef VarName = CGM.getMangledName(VD);
10916       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10917     }
10918     return;
10919   }
10920   // Register declare target variables.
10921   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10922   StringRef VarName;
10923   CharUnits VarSize;
10924   llvm::GlobalValue::LinkageTypes Linkage;
10925 
10926   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10927       !HasRequiresUnifiedSharedMemory) {
10928     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10929     VarName = CGM.getMangledName(VD);
10930     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10931       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10932       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10933     } else {
10934       VarSize = CharUnits::Zero();
10935     }
10936     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10937     // Temp solution to prevent optimizations of the internal variables.
10938     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10939       // Do not create a "ref-variable" if the original is not also available
10940       // on the host.
10941       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10942         return;
10943       std::string RefName = getName({VarName, "ref"});
10944       if (!CGM.GetGlobalValue(RefName)) {
10945         llvm::Constant *AddrRef =
10946             getOrCreateInternalVariable(Addr->getType(), RefName);
10947         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10948         GVAddrRef->setConstant(/*Val=*/true);
10949         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10950         GVAddrRef->setInitializer(Addr);
10951         CGM.addCompilerUsedGlobal(GVAddrRef);
10952       }
10953     }
10954   } else {
10955     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10956             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10957              HasRequiresUnifiedSharedMemory)) &&
10958            "Declare target attribute must link or to with unified memory.");
10959     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10960       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10961     else
10962       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10963 
10964     if (CGM.getLangOpts().OpenMPIsDevice) {
10965       VarName = Addr->getName();
10966       Addr = nullptr;
10967     } else {
10968       VarName = getAddrOfDeclareTargetVar(VD).getName();
10969       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10970     }
10971     VarSize = CGM.getPointerSize();
10972     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10973   }
10974 
10975   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10976       VarName, Addr, VarSize, Flags, Linkage);
10977 }
10978 
10979 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10980   if (isa<FunctionDecl>(GD.getDecl()) ||
10981       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10982     return emitTargetFunctions(GD);
10983 
10984   return emitTargetGlobalVariable(GD);
10985 }
10986 
10987 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10988   for (const VarDecl *VD : DeferredGlobalVariables) {
10989     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10990         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10991     if (!Res)
10992       continue;
10993     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10994         !HasRequiresUnifiedSharedMemory) {
10995       CGM.EmitGlobal(VD);
10996     } else {
10997       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10998               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10999                HasRequiresUnifiedSharedMemory)) &&
11000              "Expected link clause or to clause with unified memory.");
11001       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11002     }
11003   }
11004 }
11005 
11006 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
11007     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11008   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11009          " Expected target-based directive.");
11010 }
11011 
11012 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
11013   for (const OMPClause *Clause : D->clauselists()) {
11014     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11015       HasRequiresUnifiedSharedMemory = true;
11016     } else if (const auto *AC =
11017                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11018       switch (AC->getAtomicDefaultMemOrderKind()) {
11019       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11020         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11021         break;
11022       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11023         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11024         break;
11025       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11026         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11027         break;
11028       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11029         break;
11030       }
11031     }
11032   }
11033 }
11034 
11035 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11036   return RequiresAtomicOrdering;
11037 }
11038 
11039 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11040                                                        LangAS &AS) {
11041   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11042     return false;
11043   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11044   switch(A->getAllocatorType()) {
11045   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11046   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11047   // Not supported, fallback to the default mem space.
11048   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11049   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11050   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11051   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11052   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11053   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11054   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11055     AS = LangAS::Default;
11056     return true;
11057   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11058     llvm_unreachable("Expected predefined allocator for the variables with the "
11059                      "static storage.");
11060   }
11061   return false;
11062 }
11063 
11064 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11065   return HasRequiresUnifiedSharedMemory;
11066 }
11067 
11068 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11069     CodeGenModule &CGM)
11070     : CGM(CGM) {
11071   if (CGM.getLangOpts().OpenMPIsDevice) {
11072     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11073     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11074   }
11075 }
11076 
11077 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11078   if (CGM.getLangOpts().OpenMPIsDevice)
11079     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11080 }
11081 
11082 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11083   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11084     return true;
11085 
11086   const auto *D = cast<FunctionDecl>(GD.getDecl());
11087   // Do not to emit function if it is marked as declare target as it was already
11088   // emitted.
11089   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11090     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11091       if (auto *F = dyn_cast_or_null<llvm::Function>(
11092               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11093         return !F->isDeclaration();
11094       return false;
11095     }
11096     return true;
11097   }
11098 
11099   return !AlreadyEmittedTargetDecls.insert(D).second;
11100 }
11101 
11102 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11103   // If we don't have entries or if we are emitting code for the device, we
11104   // don't need to do anything.
11105   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11106       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11107       (OffloadEntriesInfoManager.empty() &&
11108        !HasEmittedDeclareTargetRegion &&
11109        !HasEmittedTargetRegion))
11110     return nullptr;
11111 
11112   // Create and register the function that handles the requires directives.
11113   ASTContext &C = CGM.getContext();
11114 
11115   llvm::Function *RequiresRegFn;
11116   {
11117     CodeGenFunction CGF(CGM);
11118     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11119     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11120     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11121     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11122     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11123     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11124     // TODO: check for other requires clauses.
11125     // The requires directive takes effect only when a target region is
11126     // present in the compilation unit. Otherwise it is ignored and not
11127     // passed to the runtime. This avoids the runtime from throwing an error
11128     // for mismatching requires clauses across compilation units that don't
11129     // contain at least 1 target region.
11130     assert((HasEmittedTargetRegion ||
11131             HasEmittedDeclareTargetRegion ||
11132             !OffloadEntriesInfoManager.empty()) &&
11133            "Target or declare target region expected.");
11134     if (HasRequiresUnifiedSharedMemory)
11135       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11136     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11137                             CGM.getModule(), OMPRTL___tgt_register_requires),
11138                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11139     CGF.FinishFunction();
11140   }
11141   return RequiresRegFn;
11142 }
11143 
11144 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11145                                     const OMPExecutableDirective &D,
11146                                     SourceLocation Loc,
11147                                     llvm::Function *OutlinedFn,
11148                                     ArrayRef<llvm::Value *> CapturedVars) {
11149   if (!CGF.HaveInsertPoint())
11150     return;
11151 
11152   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11153   CodeGenFunction::RunCleanupsScope Scope(CGF);
11154 
11155   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11156   llvm::Value *Args[] = {
11157       RTLoc,
11158       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11159       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11160   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11161   RealArgs.append(std::begin(Args), std::end(Args));
11162   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11163 
11164   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11165       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11166   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11167 }
11168 
11169 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11170                                          const Expr *NumTeams,
11171                                          const Expr *ThreadLimit,
11172                                          SourceLocation Loc) {
11173   if (!CGF.HaveInsertPoint())
11174     return;
11175 
11176   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11177 
11178   llvm::Value *NumTeamsVal =
11179       NumTeams
11180           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11181                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11182           : CGF.Builder.getInt32(0);
11183 
11184   llvm::Value *ThreadLimitVal =
11185       ThreadLimit
11186           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11187                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11188           : CGF.Builder.getInt32(0);
11189 
11190   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11191   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11192                                      ThreadLimitVal};
11193   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11194                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11195                       PushNumTeamsArgs);
11196 }
11197 
11198 void CGOpenMPRuntime::emitTargetDataCalls(
11199     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11200     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11201   if (!CGF.HaveInsertPoint())
11202     return;
11203 
11204   // Action used to replace the default codegen action and turn privatization
11205   // off.
11206   PrePostActionTy NoPrivAction;
11207 
11208   // Generate the code for the opening of the data environment. Capture all the
11209   // arguments of the runtime call by reference because they are used in the
11210   // closing of the region.
11211   auto &&BeginThenGen = [this, &D, Device, &Info,
11212                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11213     // Fill up the arrays with all the mapped variables.
11214     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11215 
11216     // Get map clause information.
11217     MappableExprsHandler MEHandler(D, CGF);
11218     MEHandler.generateAllInfo(CombinedInfo);
11219 
11220     // Fill up the arrays and create the arguments.
11221     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11222                          /*IsNonContiguous=*/true);
11223 
11224     llvm::Value *BasePointersArrayArg = nullptr;
11225     llvm::Value *PointersArrayArg = nullptr;
11226     llvm::Value *SizesArrayArg = nullptr;
11227     llvm::Value *MapTypesArrayArg = nullptr;
11228     llvm::Value *MapNamesArrayArg = nullptr;
11229     llvm::Value *MappersArrayArg = nullptr;
11230     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11231                                  SizesArrayArg, MapTypesArrayArg,
11232                                  MapNamesArrayArg, MappersArrayArg, Info);
11233 
11234     // Emit device ID if any.
11235     llvm::Value *DeviceID = nullptr;
11236     if (Device) {
11237       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11238                                            CGF.Int64Ty, /*isSigned=*/true);
11239     } else {
11240       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11241     }
11242 
11243     // Emit the number of elements in the offloading arrays.
11244     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11245     //
11246     // Source location for the ident struct
11247     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11248 
11249     llvm::Value *OffloadingArgs[] = {RTLoc,
11250                                      DeviceID,
11251                                      PointerNum,
11252                                      BasePointersArrayArg,
11253                                      PointersArrayArg,
11254                                      SizesArrayArg,
11255                                      MapTypesArrayArg,
11256                                      MapNamesArrayArg,
11257                                      MappersArrayArg};
11258     CGF.EmitRuntimeCall(
11259         OMPBuilder.getOrCreateRuntimeFunction(
11260             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11261         OffloadingArgs);
11262 
11263     // If device pointer privatization is required, emit the body of the region
11264     // here. It will have to be duplicated: with and without privatization.
11265     if (!Info.CaptureDeviceAddrMap.empty())
11266       CodeGen(CGF);
11267   };
11268 
11269   // Generate code for the closing of the data region.
11270   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11271                                                 PrePostActionTy &) {
11272     assert(Info.isValid() && "Invalid data environment closing arguments.");
11273 
11274     llvm::Value *BasePointersArrayArg = nullptr;
11275     llvm::Value *PointersArrayArg = nullptr;
11276     llvm::Value *SizesArrayArg = nullptr;
11277     llvm::Value *MapTypesArrayArg = nullptr;
11278     llvm::Value *MapNamesArrayArg = nullptr;
11279     llvm::Value *MappersArrayArg = nullptr;
11280     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11281                                  SizesArrayArg, MapTypesArrayArg,
11282                                  MapNamesArrayArg, MappersArrayArg, Info,
11283                                  {/*ForEndCall=*/true});
11284 
11285     // Emit device ID if any.
11286     llvm::Value *DeviceID = nullptr;
11287     if (Device) {
11288       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11289                                            CGF.Int64Ty, /*isSigned=*/true);
11290     } else {
11291       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11292     }
11293 
11294     // Emit the number of elements in the offloading arrays.
11295     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11296 
11297     // Source location for the ident struct
11298     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11299 
11300     llvm::Value *OffloadingArgs[] = {RTLoc,
11301                                      DeviceID,
11302                                      PointerNum,
11303                                      BasePointersArrayArg,
11304                                      PointersArrayArg,
11305                                      SizesArrayArg,
11306                                      MapTypesArrayArg,
11307                                      MapNamesArrayArg,
11308                                      MappersArrayArg};
11309     CGF.EmitRuntimeCall(
11310         OMPBuilder.getOrCreateRuntimeFunction(
11311             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11312         OffloadingArgs);
11313   };
11314 
11315   // If we need device pointer privatization, we need to emit the body of the
11316   // region with no privatization in the 'else' branch of the conditional.
11317   // Otherwise, we don't have to do anything.
11318   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11319                                                          PrePostActionTy &) {
11320     if (!Info.CaptureDeviceAddrMap.empty()) {
11321       CodeGen.setAction(NoPrivAction);
11322       CodeGen(CGF);
11323     }
11324   };
11325 
11326   // We don't have to do anything to close the region if the if clause evaluates
11327   // to false.
11328   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11329 
11330   if (IfCond) {
11331     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11332   } else {
11333     RegionCodeGenTy RCG(BeginThenGen);
11334     RCG(CGF);
11335   }
11336 
11337   // If we don't require privatization of device pointers, we emit the body in
11338   // between the runtime calls. This avoids duplicating the body code.
11339   if (Info.CaptureDeviceAddrMap.empty()) {
11340     CodeGen.setAction(NoPrivAction);
11341     CodeGen(CGF);
11342   }
11343 
11344   if (IfCond) {
11345     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11346   } else {
11347     RegionCodeGenTy RCG(EndThenGen);
11348     RCG(CGF);
11349   }
11350 }
11351 
11352 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11353     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11354     const Expr *Device) {
11355   if (!CGF.HaveInsertPoint())
11356     return;
11357 
11358   assert((isa<OMPTargetEnterDataDirective>(D) ||
11359           isa<OMPTargetExitDataDirective>(D) ||
11360           isa<OMPTargetUpdateDirective>(D)) &&
11361          "Expecting either target enter, exit data, or update directives.");
11362 
11363   CodeGenFunction::OMPTargetDataInfo InputInfo;
11364   llvm::Value *MapTypesArray = nullptr;
11365   llvm::Value *MapNamesArray = nullptr;
11366   // Generate the code for the opening of the data environment.
11367   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11368                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11369     // Emit device ID if any.
11370     llvm::Value *DeviceID = nullptr;
11371     if (Device) {
11372       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11373                                            CGF.Int64Ty, /*isSigned=*/true);
11374     } else {
11375       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11376     }
11377 
11378     // Emit the number of elements in the offloading arrays.
11379     llvm::Constant *PointerNum =
11380         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11381 
11382     // Source location for the ident struct
11383     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11384 
11385     llvm::Value *OffloadingArgs[] = {RTLoc,
11386                                      DeviceID,
11387                                      PointerNum,
11388                                      InputInfo.BasePointersArray.getPointer(),
11389                                      InputInfo.PointersArray.getPointer(),
11390                                      InputInfo.SizesArray.getPointer(),
11391                                      MapTypesArray,
11392                                      MapNamesArray,
11393                                      InputInfo.MappersArray.getPointer()};
11394 
11395     // Select the right runtime function call for each standalone
11396     // directive.
11397     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11398     RuntimeFunction RTLFn;
11399     switch (D.getDirectiveKind()) {
11400     case OMPD_target_enter_data:
11401       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11402                         : OMPRTL___tgt_target_data_begin_mapper;
11403       break;
11404     case OMPD_target_exit_data:
11405       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11406                         : OMPRTL___tgt_target_data_end_mapper;
11407       break;
11408     case OMPD_target_update:
11409       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11410                         : OMPRTL___tgt_target_data_update_mapper;
11411       break;
11412     case OMPD_parallel:
11413     case OMPD_for:
11414     case OMPD_parallel_for:
11415     case OMPD_parallel_master:
11416     case OMPD_parallel_sections:
11417     case OMPD_for_simd:
11418     case OMPD_parallel_for_simd:
11419     case OMPD_cancel:
11420     case OMPD_cancellation_point:
11421     case OMPD_ordered:
11422     case OMPD_threadprivate:
11423     case OMPD_allocate:
11424     case OMPD_task:
11425     case OMPD_simd:
11426     case OMPD_tile:
11427     case OMPD_unroll:
11428     case OMPD_sections:
11429     case OMPD_section:
11430     case OMPD_single:
11431     case OMPD_master:
11432     case OMPD_critical:
11433     case OMPD_taskyield:
11434     case OMPD_barrier:
11435     case OMPD_taskwait:
11436     case OMPD_taskgroup:
11437     case OMPD_atomic:
11438     case OMPD_flush:
11439     case OMPD_depobj:
11440     case OMPD_scan:
11441     case OMPD_teams:
11442     case OMPD_target_data:
11443     case OMPD_distribute:
11444     case OMPD_distribute_simd:
11445     case OMPD_distribute_parallel_for:
11446     case OMPD_distribute_parallel_for_simd:
11447     case OMPD_teams_distribute:
11448     case OMPD_teams_distribute_simd:
11449     case OMPD_teams_distribute_parallel_for:
11450     case OMPD_teams_distribute_parallel_for_simd:
11451     case OMPD_declare_simd:
11452     case OMPD_declare_variant:
11453     case OMPD_begin_declare_variant:
11454     case OMPD_end_declare_variant:
11455     case OMPD_declare_target:
11456     case OMPD_end_declare_target:
11457     case OMPD_declare_reduction:
11458     case OMPD_declare_mapper:
11459     case OMPD_taskloop:
11460     case OMPD_taskloop_simd:
11461     case OMPD_master_taskloop:
11462     case OMPD_master_taskloop_simd:
11463     case OMPD_parallel_master_taskloop:
11464     case OMPD_parallel_master_taskloop_simd:
11465     case OMPD_target:
11466     case OMPD_target_simd:
11467     case OMPD_target_teams_distribute:
11468     case OMPD_target_teams_distribute_simd:
11469     case OMPD_target_teams_distribute_parallel_for:
11470     case OMPD_target_teams_distribute_parallel_for_simd:
11471     case OMPD_target_teams:
11472     case OMPD_target_parallel:
11473     case OMPD_target_parallel_for:
11474     case OMPD_target_parallel_for_simd:
11475     case OMPD_requires:
11476     case OMPD_metadirective:
11477     case OMPD_unknown:
11478     default:
11479       llvm_unreachable("Unexpected standalone target data directive.");
11480       break;
11481     }
11482     CGF.EmitRuntimeCall(
11483         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11484         OffloadingArgs);
11485   };
11486 
11487   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11488                           &MapNamesArray](CodeGenFunction &CGF,
11489                                           PrePostActionTy &) {
11490     // Fill up the arrays with all the mapped variables.
11491     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11492 
11493     // Get map clause information.
11494     MappableExprsHandler MEHandler(D, CGF);
11495     MEHandler.generateAllInfo(CombinedInfo);
11496 
11497     TargetDataInfo Info;
11498     // Fill up the arrays and create the arguments.
11499     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11500                          /*IsNonContiguous=*/true);
11501     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11502                              D.hasClausesOfKind<OMPNowaitClause>();
11503     emitOffloadingArraysArgument(
11504         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11505         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11506         {/*ForEndCall=*/false});
11507     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11508     InputInfo.BasePointersArray =
11509         Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign());
11510     InputInfo.PointersArray =
11511         Address::deprecated(Info.PointersArray, CGM.getPointerAlign());
11512     InputInfo.SizesArray =
11513         Address::deprecated(Info.SizesArray, CGM.getPointerAlign());
11514     InputInfo.MappersArray =
11515         Address::deprecated(Info.MappersArray, CGM.getPointerAlign());
11516     MapTypesArray = Info.MapTypesArray;
11517     MapNamesArray = Info.MapNamesArray;
11518     if (RequiresOuterTask)
11519       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11520     else
11521       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11522   };
11523 
11524   if (IfCond) {
11525     emitIfClause(CGF, IfCond, TargetThenGen,
11526                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11527   } else {
11528     RegionCodeGenTy ThenRCG(TargetThenGen);
11529     ThenRCG(CGF);
11530   }
11531 }
11532 
11533 namespace {
11534   /// Kind of parameter in a function with 'declare simd' directive.
11535   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11536   /// Attribute set of the parameter.
11537   struct ParamAttrTy {
11538     ParamKindTy Kind = Vector;
11539     llvm::APSInt StrideOrArg;
11540     llvm::APSInt Alignment;
11541   };
11542 } // namespace
11543 
11544 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11545                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11546   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11547   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11548   // of that clause. The VLEN value must be power of 2.
11549   // In other case the notion of the function`s "characteristic data type" (CDT)
11550   // is used to compute the vector length.
11551   // CDT is defined in the following order:
11552   //   a) For non-void function, the CDT is the return type.
11553   //   b) If the function has any non-uniform, non-linear parameters, then the
11554   //   CDT is the type of the first such parameter.
11555   //   c) If the CDT determined by a) or b) above is struct, union, or class
11556   //   type which is pass-by-value (except for the type that maps to the
11557   //   built-in complex data type), the characteristic data type is int.
11558   //   d) If none of the above three cases is applicable, the CDT is int.
11559   // The VLEN is then determined based on the CDT and the size of vector
11560   // register of that ISA for which current vector version is generated. The
11561   // VLEN is computed using the formula below:
11562   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11563   // where vector register size specified in section 3.2.1 Registers and the
11564   // Stack Frame of original AMD64 ABI document.
11565   QualType RetType = FD->getReturnType();
11566   if (RetType.isNull())
11567     return 0;
11568   ASTContext &C = FD->getASTContext();
11569   QualType CDT;
11570   if (!RetType.isNull() && !RetType->isVoidType()) {
11571     CDT = RetType;
11572   } else {
11573     unsigned Offset = 0;
11574     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11575       if (ParamAttrs[Offset].Kind == Vector)
11576         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11577       ++Offset;
11578     }
11579     if (CDT.isNull()) {
11580       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11581         if (ParamAttrs[I + Offset].Kind == Vector) {
11582           CDT = FD->getParamDecl(I)->getType();
11583           break;
11584         }
11585       }
11586     }
11587   }
11588   if (CDT.isNull())
11589     CDT = C.IntTy;
11590   CDT = CDT->getCanonicalTypeUnqualified();
11591   if (CDT->isRecordType() || CDT->isUnionType())
11592     CDT = C.IntTy;
11593   return C.getTypeSize(CDT);
11594 }
11595 
11596 static void
11597 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11598                            const llvm::APSInt &VLENVal,
11599                            ArrayRef<ParamAttrTy> ParamAttrs,
11600                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11601   struct ISADataTy {
11602     char ISA;
11603     unsigned VecRegSize;
11604   };
11605   ISADataTy ISAData[] = {
11606       {
11607           'b', 128
11608       }, // SSE
11609       {
11610           'c', 256
11611       }, // AVX
11612       {
11613           'd', 256
11614       }, // AVX2
11615       {
11616           'e', 512
11617       }, // AVX512
11618   };
11619   llvm::SmallVector<char, 2> Masked;
11620   switch (State) {
11621   case OMPDeclareSimdDeclAttr::BS_Undefined:
11622     Masked.push_back('N');
11623     Masked.push_back('M');
11624     break;
11625   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11626     Masked.push_back('N');
11627     break;
11628   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11629     Masked.push_back('M');
11630     break;
11631   }
11632   for (char Mask : Masked) {
11633     for (const ISADataTy &Data : ISAData) {
11634       SmallString<256> Buffer;
11635       llvm::raw_svector_ostream Out(Buffer);
11636       Out << "_ZGV" << Data.ISA << Mask;
11637       if (!VLENVal) {
11638         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11639         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11640         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11641       } else {
11642         Out << VLENVal;
11643       }
11644       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11645         switch (ParamAttr.Kind){
11646         case LinearWithVarStride:
11647           Out << 's' << ParamAttr.StrideOrArg;
11648           break;
11649         case Linear:
11650           Out << 'l';
11651           if (ParamAttr.StrideOrArg != 1)
11652             Out << ParamAttr.StrideOrArg;
11653           break;
11654         case Uniform:
11655           Out << 'u';
11656           break;
11657         case Vector:
11658           Out << 'v';
11659           break;
11660         }
11661         if (!!ParamAttr.Alignment)
11662           Out << 'a' << ParamAttr.Alignment;
11663       }
11664       Out << '_' << Fn->getName();
11665       Fn->addFnAttr(Out.str());
11666     }
11667   }
11668 }
11669 
11670 // This are the Functions that are needed to mangle the name of the
11671 // vector functions generated by the compiler, according to the rules
11672 // defined in the "Vector Function ABI specifications for AArch64",
11673 // available at
11674 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11675 
11676 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11677 ///
11678 /// TODO: Need to implement the behavior for reference marked with a
11679 /// var or no linear modifiers (1.b in the section). For this, we
11680 /// need to extend ParamKindTy to support the linear modifiers.
11681 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11682   QT = QT.getCanonicalType();
11683 
11684   if (QT->isVoidType())
11685     return false;
11686 
11687   if (Kind == ParamKindTy::Uniform)
11688     return false;
11689 
11690   if (Kind == ParamKindTy::Linear)
11691     return false;
11692 
11693   // TODO: Handle linear references with modifiers
11694 
11695   if (Kind == ParamKindTy::LinearWithVarStride)
11696     return false;
11697 
11698   return true;
11699 }
11700 
11701 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11702 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11703   QT = QT.getCanonicalType();
11704   unsigned Size = C.getTypeSize(QT);
11705 
11706   // Only scalars and complex within 16 bytes wide set PVB to true.
11707   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11708     return false;
11709 
11710   if (QT->isFloatingType())
11711     return true;
11712 
11713   if (QT->isIntegerType())
11714     return true;
11715 
11716   if (QT->isPointerType())
11717     return true;
11718 
11719   // TODO: Add support for complex types (section 3.1.2, item 2).
11720 
11721   return false;
11722 }
11723 
11724 /// Computes the lane size (LS) of a return type or of an input parameter,
11725 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11726 /// TODO: Add support for references, section 3.2.1, item 1.
11727 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11728   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11729     QualType PTy = QT.getCanonicalType()->getPointeeType();
11730     if (getAArch64PBV(PTy, C))
11731       return C.getTypeSize(PTy);
11732   }
11733   if (getAArch64PBV(QT, C))
11734     return C.getTypeSize(QT);
11735 
11736   return C.getTypeSize(C.getUIntPtrType());
11737 }
11738 
11739 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11740 // signature of the scalar function, as defined in 3.2.2 of the
11741 // AAVFABI.
11742 static std::tuple<unsigned, unsigned, bool>
11743 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11744   QualType RetType = FD->getReturnType().getCanonicalType();
11745 
11746   ASTContext &C = FD->getASTContext();
11747 
11748   bool OutputBecomesInput = false;
11749 
11750   llvm::SmallVector<unsigned, 8> Sizes;
11751   if (!RetType->isVoidType()) {
11752     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11753     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11754       OutputBecomesInput = true;
11755   }
11756   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11757     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11758     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11759   }
11760 
11761   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11762   // The LS of a function parameter / return value can only be a power
11763   // of 2, starting from 8 bits, up to 128.
11764   assert(llvm::all_of(Sizes,
11765                       [](unsigned Size) {
11766                         return Size == 8 || Size == 16 || Size == 32 ||
11767                                Size == 64 || Size == 128;
11768                       }) &&
11769          "Invalid size");
11770 
11771   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11772                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11773                          OutputBecomesInput);
11774 }
11775 
11776 /// Mangle the parameter part of the vector function name according to
11777 /// their OpenMP classification. The mangling function is defined in
11778 /// section 3.5 of the AAVFABI.
11779 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11780   SmallString<256> Buffer;
11781   llvm::raw_svector_ostream Out(Buffer);
11782   for (const auto &ParamAttr : ParamAttrs) {
11783     switch (ParamAttr.Kind) {
11784     case LinearWithVarStride:
11785       Out << "ls" << ParamAttr.StrideOrArg;
11786       break;
11787     case Linear:
11788       Out << 'l';
11789       // Don't print the step value if it is not present or if it is
11790       // equal to 1.
11791       if (ParamAttr.StrideOrArg != 1)
11792         Out << ParamAttr.StrideOrArg;
11793       break;
11794     case Uniform:
11795       Out << 'u';
11796       break;
11797     case Vector:
11798       Out << 'v';
11799       break;
11800     }
11801 
11802     if (!!ParamAttr.Alignment)
11803       Out << 'a' << ParamAttr.Alignment;
11804   }
11805 
11806   return std::string(Out.str());
11807 }
11808 
11809 // Function used to add the attribute. The parameter `VLEN` is
11810 // templated to allow the use of "x" when targeting scalable functions
11811 // for SVE.
11812 template <typename T>
11813 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11814                                  char ISA, StringRef ParSeq,
11815                                  StringRef MangledName, bool OutputBecomesInput,
11816                                  llvm::Function *Fn) {
11817   SmallString<256> Buffer;
11818   llvm::raw_svector_ostream Out(Buffer);
11819   Out << Prefix << ISA << LMask << VLEN;
11820   if (OutputBecomesInput)
11821     Out << "v";
11822   Out << ParSeq << "_" << MangledName;
11823   Fn->addFnAttr(Out.str());
11824 }
11825 
11826 // Helper function to generate the Advanced SIMD names depending on
11827 // the value of the NDS when simdlen is not present.
11828 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11829                                       StringRef Prefix, char ISA,
11830                                       StringRef ParSeq, StringRef MangledName,
11831                                       bool OutputBecomesInput,
11832                                       llvm::Function *Fn) {
11833   switch (NDS) {
11834   case 8:
11835     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11836                          OutputBecomesInput, Fn);
11837     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11838                          OutputBecomesInput, Fn);
11839     break;
11840   case 16:
11841     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11842                          OutputBecomesInput, Fn);
11843     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11844                          OutputBecomesInput, Fn);
11845     break;
11846   case 32:
11847     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11848                          OutputBecomesInput, Fn);
11849     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11850                          OutputBecomesInput, Fn);
11851     break;
11852   case 64:
11853   case 128:
11854     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11855                          OutputBecomesInput, Fn);
11856     break;
11857   default:
11858     llvm_unreachable("Scalar type is too wide.");
11859   }
11860 }
11861 
11862 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11863 static void emitAArch64DeclareSimdFunction(
11864     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11865     ArrayRef<ParamAttrTy> ParamAttrs,
11866     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11867     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11868 
11869   // Get basic data for building the vector signature.
11870   const auto Data = getNDSWDS(FD, ParamAttrs);
11871   const unsigned NDS = std::get<0>(Data);
11872   const unsigned WDS = std::get<1>(Data);
11873   const bool OutputBecomesInput = std::get<2>(Data);
11874 
11875   // Check the values provided via `simdlen` by the user.
11876   // 1. A `simdlen(1)` doesn't produce vector signatures,
11877   if (UserVLEN == 1) {
11878     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11879         DiagnosticsEngine::Warning,
11880         "The clause simdlen(1) has no effect when targeting aarch64.");
11881     CGM.getDiags().Report(SLoc, DiagID);
11882     return;
11883   }
11884 
11885   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11886   // Advanced SIMD output.
11887   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11888     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11889         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11890                                     "power of 2 when targeting Advanced SIMD.");
11891     CGM.getDiags().Report(SLoc, DiagID);
11892     return;
11893   }
11894 
11895   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11896   // limits.
11897   if (ISA == 's' && UserVLEN != 0) {
11898     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11899       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11900           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11901                                       "lanes in the architectural constraints "
11902                                       "for SVE (min is 128-bit, max is "
11903                                       "2048-bit, by steps of 128-bit)");
11904       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11905       return;
11906     }
11907   }
11908 
11909   // Sort out parameter sequence.
11910   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11911   StringRef Prefix = "_ZGV";
11912   // Generate simdlen from user input (if any).
11913   if (UserVLEN) {
11914     if (ISA == 's') {
11915       // SVE generates only a masked function.
11916       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11917                            OutputBecomesInput, Fn);
11918     } else {
11919       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11920       // Advanced SIMD generates one or two functions, depending on
11921       // the `[not]inbranch` clause.
11922       switch (State) {
11923       case OMPDeclareSimdDeclAttr::BS_Undefined:
11924         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11925                              OutputBecomesInput, Fn);
11926         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11927                              OutputBecomesInput, Fn);
11928         break;
11929       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11930         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11931                              OutputBecomesInput, Fn);
11932         break;
11933       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11934         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11935                              OutputBecomesInput, Fn);
11936         break;
11937       }
11938     }
11939   } else {
11940     // If no user simdlen is provided, follow the AAVFABI rules for
11941     // generating the vector length.
11942     if (ISA == 's') {
11943       // SVE, section 3.4.1, item 1.
11944       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11945                            OutputBecomesInput, Fn);
11946     } else {
11947       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11948       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11949       // two vector names depending on the use of the clause
11950       // `[not]inbranch`.
11951       switch (State) {
11952       case OMPDeclareSimdDeclAttr::BS_Undefined:
11953         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11954                                   OutputBecomesInput, Fn);
11955         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11956                                   OutputBecomesInput, Fn);
11957         break;
11958       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11959         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11960                                   OutputBecomesInput, Fn);
11961         break;
11962       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11963         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11964                                   OutputBecomesInput, Fn);
11965         break;
11966       }
11967     }
11968   }
11969 }
11970 
11971 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11972                                               llvm::Function *Fn) {
11973   ASTContext &C = CGM.getContext();
11974   FD = FD->getMostRecentDecl();
11975   // Map params to their positions in function decl.
11976   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11977   if (isa<CXXMethodDecl>(FD))
11978     ParamPositions.try_emplace(FD, 0);
11979   unsigned ParamPos = ParamPositions.size();
11980   for (const ParmVarDecl *P : FD->parameters()) {
11981     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11982     ++ParamPos;
11983   }
11984   while (FD) {
11985     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11986       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11987       // Mark uniform parameters.
11988       for (const Expr *E : Attr->uniforms()) {
11989         E = E->IgnoreParenImpCasts();
11990         unsigned Pos;
11991         if (isa<CXXThisExpr>(E)) {
11992           Pos = ParamPositions[FD];
11993         } else {
11994           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11995                                 ->getCanonicalDecl();
11996           Pos = ParamPositions[PVD];
11997         }
11998         ParamAttrs[Pos].Kind = Uniform;
11999       }
12000       // Get alignment info.
12001       auto *NI = Attr->alignments_begin();
12002       for (const Expr *E : Attr->aligneds()) {
12003         E = E->IgnoreParenImpCasts();
12004         unsigned Pos;
12005         QualType ParmTy;
12006         if (isa<CXXThisExpr>(E)) {
12007           Pos = ParamPositions[FD];
12008           ParmTy = E->getType();
12009         } else {
12010           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12011                                 ->getCanonicalDecl();
12012           Pos = ParamPositions[PVD];
12013           ParmTy = PVD->getType();
12014         }
12015         ParamAttrs[Pos].Alignment =
12016             (*NI)
12017                 ? (*NI)->EvaluateKnownConstInt(C)
12018                 : llvm::APSInt::getUnsigned(
12019                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12020                           .getQuantity());
12021         ++NI;
12022       }
12023       // Mark linear parameters.
12024       auto *SI = Attr->steps_begin();
12025       auto *MI = Attr->modifiers_begin();
12026       for (const Expr *E : Attr->linears()) {
12027         E = E->IgnoreParenImpCasts();
12028         unsigned Pos;
12029         // Rescaling factor needed to compute the linear parameter
12030         // value in the mangled name.
12031         unsigned PtrRescalingFactor = 1;
12032         if (isa<CXXThisExpr>(E)) {
12033           Pos = ParamPositions[FD];
12034         } else {
12035           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12036                                 ->getCanonicalDecl();
12037           Pos = ParamPositions[PVD];
12038           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12039             PtrRescalingFactor = CGM.getContext()
12040                                      .getTypeSizeInChars(P->getPointeeType())
12041                                      .getQuantity();
12042         }
12043         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12044         ParamAttr.Kind = Linear;
12045         // Assuming a stride of 1, for `linear` without modifiers.
12046         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12047         if (*SI) {
12048           Expr::EvalResult Result;
12049           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12050             if (const auto *DRE =
12051                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12052               if (const auto *StridePVD =
12053                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12054                 ParamAttr.Kind = LinearWithVarStride;
12055                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12056                     ParamPositions[StridePVD->getCanonicalDecl()]);
12057               }
12058             }
12059           } else {
12060             ParamAttr.StrideOrArg = Result.Val.getInt();
12061           }
12062         }
12063         // If we are using a linear clause on a pointer, we need to
12064         // rescale the value of linear_step with the byte size of the
12065         // pointee type.
12066         if (Linear == ParamAttr.Kind)
12067           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12068         ++SI;
12069         ++MI;
12070       }
12071       llvm::APSInt VLENVal;
12072       SourceLocation ExprLoc;
12073       const Expr *VLENExpr = Attr->getSimdlen();
12074       if (VLENExpr) {
12075         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12076         ExprLoc = VLENExpr->getExprLoc();
12077       }
12078       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12079       if (CGM.getTriple().isX86()) {
12080         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12081       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12082         unsigned VLEN = VLENVal.getExtValue();
12083         StringRef MangledName = Fn->getName();
12084         if (CGM.getTarget().hasFeature("sve"))
12085           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12086                                          MangledName, 's', 128, Fn, ExprLoc);
12087         if (CGM.getTarget().hasFeature("neon"))
12088           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12089                                          MangledName, 'n', 128, Fn, ExprLoc);
12090       }
12091     }
12092     FD = FD->getPreviousDecl();
12093   }
12094 }
12095 
12096 namespace {
12097 /// Cleanup action for doacross support.
12098 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12099 public:
12100   static const int DoacrossFinArgs = 2;
12101 
12102 private:
12103   llvm::FunctionCallee RTLFn;
12104   llvm::Value *Args[DoacrossFinArgs];
12105 
12106 public:
12107   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12108                     ArrayRef<llvm::Value *> CallArgs)
12109       : RTLFn(RTLFn) {
12110     assert(CallArgs.size() == DoacrossFinArgs);
12111     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12112   }
12113   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12114     if (!CGF.HaveInsertPoint())
12115       return;
12116     CGF.EmitRuntimeCall(RTLFn, Args);
12117   }
12118 };
12119 } // namespace
12120 
12121 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12122                                        const OMPLoopDirective &D,
12123                                        ArrayRef<Expr *> NumIterations) {
12124   if (!CGF.HaveInsertPoint())
12125     return;
12126 
12127   ASTContext &C = CGM.getContext();
12128   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12129   RecordDecl *RD;
12130   if (KmpDimTy.isNull()) {
12131     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12132     //  kmp_int64 lo; // lower
12133     //  kmp_int64 up; // upper
12134     //  kmp_int64 st; // stride
12135     // };
12136     RD = C.buildImplicitRecord("kmp_dim");
12137     RD->startDefinition();
12138     addFieldToRecordDecl(C, RD, Int64Ty);
12139     addFieldToRecordDecl(C, RD, Int64Ty);
12140     addFieldToRecordDecl(C, RD, Int64Ty);
12141     RD->completeDefinition();
12142     KmpDimTy = C.getRecordType(RD);
12143   } else {
12144     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12145   }
12146   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12147   QualType ArrayTy =
12148       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12149 
12150   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12151   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12152   enum { LowerFD = 0, UpperFD, StrideFD };
12153   // Fill dims with data.
12154   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12155     LValue DimsLVal = CGF.MakeAddrLValue(
12156         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12157     // dims.upper = num_iterations;
12158     LValue UpperLVal = CGF.EmitLValueForField(
12159         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12160     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12161         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12162         Int64Ty, NumIterations[I]->getExprLoc());
12163     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12164     // dims.stride = 1;
12165     LValue StrideLVal = CGF.EmitLValueForField(
12166         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12167     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12168                           StrideLVal);
12169   }
12170 
12171   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12172   // kmp_int32 num_dims, struct kmp_dim * dims);
12173   llvm::Value *Args[] = {
12174       emitUpdateLocation(CGF, D.getBeginLoc()),
12175       getThreadID(CGF, D.getBeginLoc()),
12176       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12177       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12178           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12179           CGM.VoidPtrTy)};
12180 
12181   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12182       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12183   CGF.EmitRuntimeCall(RTLFn, Args);
12184   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12185       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12186   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12187       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12188   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12189                                              llvm::makeArrayRef(FiniArgs));
12190 }
12191 
12192 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12193                                           const OMPDependClause *C) {
12194   QualType Int64Ty =
12195       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12196   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12197   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12198       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12199   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12200   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12201     const Expr *CounterVal = C->getLoopData(I);
12202     assert(CounterVal);
12203     llvm::Value *CntVal = CGF.EmitScalarConversion(
12204         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12205         CounterVal->getExprLoc());
12206     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12207                           /*Volatile=*/false, Int64Ty);
12208   }
12209   llvm::Value *Args[] = {
12210       emitUpdateLocation(CGF, C->getBeginLoc()),
12211       getThreadID(CGF, C->getBeginLoc()),
12212       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12213   llvm::FunctionCallee RTLFn;
12214   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12215     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12216                                                   OMPRTL___kmpc_doacross_post);
12217   } else {
12218     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12219     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12220                                                   OMPRTL___kmpc_doacross_wait);
12221   }
12222   CGF.EmitRuntimeCall(RTLFn, Args);
12223 }
12224 
12225 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12226                                llvm::FunctionCallee Callee,
12227                                ArrayRef<llvm::Value *> Args) const {
12228   assert(Loc.isValid() && "Outlined function call location must be valid.");
12229   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12230 
12231   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12232     if (Fn->doesNotThrow()) {
12233       CGF.EmitNounwindRuntimeCall(Fn, Args);
12234       return;
12235     }
12236   }
12237   CGF.EmitRuntimeCall(Callee, Args);
12238 }
12239 
12240 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12241     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12242     ArrayRef<llvm::Value *> Args) const {
12243   emitCall(CGF, Loc, OutlinedFn, Args);
12244 }
12245 
12246 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12247   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12248     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12249       HasEmittedDeclareTargetRegion = true;
12250 }
12251 
12252 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12253                                              const VarDecl *NativeParam,
12254                                              const VarDecl *TargetParam) const {
12255   return CGF.GetAddrOfLocalVar(NativeParam);
12256 }
12257 
12258 /// Return allocator value from expression, or return a null allocator (default
12259 /// when no allocator specified).
12260 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12261                                     const Expr *Allocator) {
12262   llvm::Value *AllocVal;
12263   if (Allocator) {
12264     AllocVal = CGF.EmitScalarExpr(Allocator);
12265     // According to the standard, the original allocator type is a enum
12266     // (integer). Convert to pointer type, if required.
12267     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12268                                         CGF.getContext().VoidPtrTy,
12269                                         Allocator->getExprLoc());
12270   } else {
12271     // If no allocator specified, it defaults to the null allocator.
12272     AllocVal = llvm::Constant::getNullValue(
12273         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12274   }
12275   return AllocVal;
12276 }
12277 
12278 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12279                                                    const VarDecl *VD) {
12280   if (!VD)
12281     return Address::invalid();
12282   Address UntiedAddr = Address::invalid();
12283   Address UntiedRealAddr = Address::invalid();
12284   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12285   if (It != FunctionToUntiedTaskStackMap.end()) {
12286     const UntiedLocalVarsAddressesMap &UntiedData =
12287         UntiedLocalVarsStack[It->second];
12288     auto I = UntiedData.find(VD);
12289     if (I != UntiedData.end()) {
12290       UntiedAddr = I->second.first;
12291       UntiedRealAddr = I->second.second;
12292     }
12293   }
12294   const VarDecl *CVD = VD->getCanonicalDecl();
12295   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12296     // Use the default allocation.
12297     if (!isAllocatableDecl(VD))
12298       return UntiedAddr;
12299     llvm::Value *Size;
12300     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12301     if (CVD->getType()->isVariablyModifiedType()) {
12302       Size = CGF.getTypeSize(CVD->getType());
12303       // Align the size: ((size + align - 1) / align) * align
12304       Size = CGF.Builder.CreateNUWAdd(
12305           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12306       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12307       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12308     } else {
12309       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12310       Size = CGM.getSize(Sz.alignTo(Align));
12311     }
12312     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12313     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12314     const Expr *Allocator = AA->getAllocator();
12315     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12316     llvm::Value *Alignment =
12317         AA->getAlignment()
12318             ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
12319                                         CGM.SizeTy, /*isSigned=*/false)
12320             : nullptr;
12321     SmallVector<llvm::Value *, 4> Args;
12322     Args.push_back(ThreadID);
12323     if (Alignment)
12324       Args.push_back(Alignment);
12325     Args.push_back(Size);
12326     Args.push_back(AllocVal);
12327     llvm::omp::RuntimeFunction FnID =
12328         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12329     llvm::Value *Addr = CGF.EmitRuntimeCall(
12330         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12331         getName({CVD->getName(), ".void.addr"}));
12332     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12333         CGM.getModule(), OMPRTL___kmpc_free);
12334     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12335     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12336         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12337     if (UntiedAddr.isValid())
12338       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12339 
12340     // Cleanup action for allocate support.
12341     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12342       llvm::FunctionCallee RTLFn;
12343       SourceLocation::UIntTy LocEncoding;
12344       Address Addr;
12345       const Expr *AllocExpr;
12346 
12347     public:
12348       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12349                            SourceLocation::UIntTy LocEncoding, Address Addr,
12350                            const Expr *AllocExpr)
12351           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12352             AllocExpr(AllocExpr) {}
12353       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12354         if (!CGF.HaveInsertPoint())
12355           return;
12356         llvm::Value *Args[3];
12357         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12358             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12359         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12360             Addr.getPointer(), CGF.VoidPtrTy);
12361         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12362         Args[2] = AllocVal;
12363         CGF.EmitRuntimeCall(RTLFn, Args);
12364       }
12365     };
12366     Address VDAddr = UntiedRealAddr.isValid()
12367                          ? UntiedRealAddr
12368                          : Address::deprecated(Addr, Align);
12369     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12370         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12371         VDAddr, Allocator);
12372     if (UntiedRealAddr.isValid())
12373       if (auto *Region =
12374               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12375         Region->emitUntiedSwitch(CGF);
12376     return VDAddr;
12377   }
12378   return UntiedAddr;
12379 }
12380 
12381 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12382                                              const VarDecl *VD) const {
12383   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12384   if (It == FunctionToUntiedTaskStackMap.end())
12385     return false;
12386   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12387 }
12388 
12389 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12390     CodeGenModule &CGM, const OMPLoopDirective &S)
12391     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12392   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12393   if (!NeedToPush)
12394     return;
12395   NontemporalDeclsSet &DS =
12396       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12397   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12398     for (const Stmt *Ref : C->private_refs()) {
12399       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12400       const ValueDecl *VD;
12401       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12402         VD = DRE->getDecl();
12403       } else {
12404         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12405         assert((ME->isImplicitCXXThis() ||
12406                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12407                "Expected member of current class.");
12408         VD = ME->getMemberDecl();
12409       }
12410       DS.insert(VD);
12411     }
12412   }
12413 }
12414 
12415 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12416   if (!NeedToPush)
12417     return;
12418   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12419 }
12420 
12421 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12422     CodeGenFunction &CGF,
12423     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12424                           std::pair<Address, Address>> &LocalVars)
12425     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12426   if (!NeedToPush)
12427     return;
12428   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12429       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12430   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12431 }
12432 
12433 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12434   if (!NeedToPush)
12435     return;
12436   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12437 }
12438 
12439 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12440   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12441 
12442   return llvm::any_of(
12443       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12444       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12445 }
12446 
12447 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12448     const OMPExecutableDirective &S,
12449     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12450     const {
12451   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12452   // Vars in target/task regions must be excluded completely.
12453   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12454       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12455     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12456     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12457     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12458     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12459       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12460         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12461     }
12462   }
12463   // Exclude vars in private clauses.
12464   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12465     for (const Expr *Ref : C->varlists()) {
12466       if (!Ref->getType()->isScalarType())
12467         continue;
12468       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12469       if (!DRE)
12470         continue;
12471       NeedToCheckForLPCs.insert(DRE->getDecl());
12472     }
12473   }
12474   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12475     for (const Expr *Ref : C->varlists()) {
12476       if (!Ref->getType()->isScalarType())
12477         continue;
12478       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12479       if (!DRE)
12480         continue;
12481       NeedToCheckForLPCs.insert(DRE->getDecl());
12482     }
12483   }
12484   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12485     for (const Expr *Ref : C->varlists()) {
12486       if (!Ref->getType()->isScalarType())
12487         continue;
12488       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12489       if (!DRE)
12490         continue;
12491       NeedToCheckForLPCs.insert(DRE->getDecl());
12492     }
12493   }
12494   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12495     for (const Expr *Ref : C->varlists()) {
12496       if (!Ref->getType()->isScalarType())
12497         continue;
12498       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12499       if (!DRE)
12500         continue;
12501       NeedToCheckForLPCs.insert(DRE->getDecl());
12502     }
12503   }
12504   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12505     for (const Expr *Ref : C->varlists()) {
12506       if (!Ref->getType()->isScalarType())
12507         continue;
12508       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12509       if (!DRE)
12510         continue;
12511       NeedToCheckForLPCs.insert(DRE->getDecl());
12512     }
12513   }
12514   for (const Decl *VD : NeedToCheckForLPCs) {
12515     for (const LastprivateConditionalData &Data :
12516          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12517       if (Data.DeclToUniqueName.count(VD) > 0) {
12518         if (!Data.Disabled)
12519           NeedToAddForLPCsAsDisabled.insert(VD);
12520         break;
12521       }
12522     }
12523   }
12524 }
12525 
12526 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12527     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12528     : CGM(CGF.CGM),
12529       Action((CGM.getLangOpts().OpenMP >= 50 &&
12530               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12531                            [](const OMPLastprivateClause *C) {
12532                              return C->getKind() ==
12533                                     OMPC_LASTPRIVATE_conditional;
12534                            }))
12535                  ? ActionToDo::PushAsLastprivateConditional
12536                  : ActionToDo::DoNotPush) {
12537   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12538   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12539     return;
12540   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12541          "Expected a push action.");
12542   LastprivateConditionalData &Data =
12543       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12544   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12545     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12546       continue;
12547 
12548     for (const Expr *Ref : C->varlists()) {
12549       Data.DeclToUniqueName.insert(std::make_pair(
12550           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12551           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12552     }
12553   }
12554   Data.IVLVal = IVLVal;
12555   Data.Fn = CGF.CurFn;
12556 }
12557 
12558 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12559     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12560     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12561   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12562   if (CGM.getLangOpts().OpenMP < 50)
12563     return;
12564   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12565   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12566   if (!NeedToAddForLPCsAsDisabled.empty()) {
12567     Action = ActionToDo::DisableLastprivateConditional;
12568     LastprivateConditionalData &Data =
12569         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12570     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12571       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12572     Data.Fn = CGF.CurFn;
12573     Data.Disabled = true;
12574   }
12575 }
12576 
12577 CGOpenMPRuntime::LastprivateConditionalRAII
12578 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12579     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12580   return LastprivateConditionalRAII(CGF, S);
12581 }
12582 
12583 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12584   if (CGM.getLangOpts().OpenMP < 50)
12585     return;
12586   if (Action == ActionToDo::DisableLastprivateConditional) {
12587     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12588            "Expected list of disabled private vars.");
12589     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12590   }
12591   if (Action == ActionToDo::PushAsLastprivateConditional) {
12592     assert(
12593         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12594         "Expected list of lastprivate conditional vars.");
12595     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12596   }
12597 }
12598 
12599 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12600                                                         const VarDecl *VD) {
12601   ASTContext &C = CGM.getContext();
12602   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12603   if (I == LastprivateConditionalToTypes.end())
12604     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12605   QualType NewType;
12606   const FieldDecl *VDField;
12607   const FieldDecl *FiredField;
12608   LValue BaseLVal;
12609   auto VI = I->getSecond().find(VD);
12610   if (VI == I->getSecond().end()) {
12611     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12612     RD->startDefinition();
12613     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12614     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12615     RD->completeDefinition();
12616     NewType = C.getRecordType(RD);
12617     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12618     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12619     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12620   } else {
12621     NewType = std::get<0>(VI->getSecond());
12622     VDField = std::get<1>(VI->getSecond());
12623     FiredField = std::get<2>(VI->getSecond());
12624     BaseLVal = std::get<3>(VI->getSecond());
12625   }
12626   LValue FiredLVal =
12627       CGF.EmitLValueForField(BaseLVal, FiredField);
12628   CGF.EmitStoreOfScalar(
12629       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12630       FiredLVal);
12631   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12632 }
12633 
12634 namespace {
12635 /// Checks if the lastprivate conditional variable is referenced in LHS.
12636 class LastprivateConditionalRefChecker final
12637     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12638   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12639   const Expr *FoundE = nullptr;
12640   const Decl *FoundD = nullptr;
12641   StringRef UniqueDeclName;
12642   LValue IVLVal;
12643   llvm::Function *FoundFn = nullptr;
12644   SourceLocation Loc;
12645 
12646 public:
12647   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12648     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12649          llvm::reverse(LPM)) {
12650       auto It = D.DeclToUniqueName.find(E->getDecl());
12651       if (It == D.DeclToUniqueName.end())
12652         continue;
12653       if (D.Disabled)
12654         return false;
12655       FoundE = E;
12656       FoundD = E->getDecl()->getCanonicalDecl();
12657       UniqueDeclName = It->second;
12658       IVLVal = D.IVLVal;
12659       FoundFn = D.Fn;
12660       break;
12661     }
12662     return FoundE == E;
12663   }
12664   bool VisitMemberExpr(const MemberExpr *E) {
12665     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12666       return false;
12667     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12668          llvm::reverse(LPM)) {
12669       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12670       if (It == D.DeclToUniqueName.end())
12671         continue;
12672       if (D.Disabled)
12673         return false;
12674       FoundE = E;
12675       FoundD = E->getMemberDecl()->getCanonicalDecl();
12676       UniqueDeclName = It->second;
12677       IVLVal = D.IVLVal;
12678       FoundFn = D.Fn;
12679       break;
12680     }
12681     return FoundE == E;
12682   }
12683   bool VisitStmt(const Stmt *S) {
12684     for (const Stmt *Child : S->children()) {
12685       if (!Child)
12686         continue;
12687       if (const auto *E = dyn_cast<Expr>(Child))
12688         if (!E->isGLValue())
12689           continue;
12690       if (Visit(Child))
12691         return true;
12692     }
12693     return false;
12694   }
12695   explicit LastprivateConditionalRefChecker(
12696       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12697       : LPM(LPM) {}
12698   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12699   getFoundData() const {
12700     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12701   }
12702 };
12703 } // namespace
12704 
12705 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12706                                                        LValue IVLVal,
12707                                                        StringRef UniqueDeclName,
12708                                                        LValue LVal,
12709                                                        SourceLocation Loc) {
12710   // Last updated loop counter for the lastprivate conditional var.
12711   // int<xx> last_iv = 0;
12712   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12713   llvm::Constant *LastIV =
12714       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12715   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12716       IVLVal.getAlignment().getAsAlign());
12717   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12718 
12719   // Last value of the lastprivate conditional.
12720   // decltype(priv_a) last_a;
12721   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12722       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12723   Last->setAlignment(LVal.getAlignment().getAsAlign());
12724   LValue LastLVal = CGF.MakeAddrLValue(
12725       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12726 
12727   // Global loop counter. Required to handle inner parallel-for regions.
12728   // iv
12729   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12730 
12731   // #pragma omp critical(a)
12732   // if (last_iv <= iv) {
12733   //   last_iv = iv;
12734   //   last_a = priv_a;
12735   // }
12736   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12737                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12738     Action.Enter(CGF);
12739     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12740     // (last_iv <= iv) ? Check if the variable is updated and store new
12741     // value in global var.
12742     llvm::Value *CmpRes;
12743     if (IVLVal.getType()->isSignedIntegerType()) {
12744       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12745     } else {
12746       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12747              "Loop iteration variable must be integer.");
12748       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12749     }
12750     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12751     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12752     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12753     // {
12754     CGF.EmitBlock(ThenBB);
12755 
12756     //   last_iv = iv;
12757     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12758 
12759     //   last_a = priv_a;
12760     switch (CGF.getEvaluationKind(LVal.getType())) {
12761     case TEK_Scalar: {
12762       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12763       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12764       break;
12765     }
12766     case TEK_Complex: {
12767       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12768       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12769       break;
12770     }
12771     case TEK_Aggregate:
12772       llvm_unreachable(
12773           "Aggregates are not supported in lastprivate conditional.");
12774     }
12775     // }
12776     CGF.EmitBranch(ExitBB);
12777     // There is no need to emit line number for unconditional branch.
12778     (void)ApplyDebugLocation::CreateEmpty(CGF);
12779     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12780   };
12781 
12782   if (CGM.getLangOpts().OpenMPSimd) {
12783     // Do not emit as a critical region as no parallel region could be emitted.
12784     RegionCodeGenTy ThenRCG(CodeGen);
12785     ThenRCG(CGF);
12786   } else {
12787     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12788   }
12789 }
12790 
12791 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12792                                                          const Expr *LHS) {
12793   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12794     return;
12795   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12796   if (!Checker.Visit(LHS))
12797     return;
12798   const Expr *FoundE;
12799   const Decl *FoundD;
12800   StringRef UniqueDeclName;
12801   LValue IVLVal;
12802   llvm::Function *FoundFn;
12803   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12804       Checker.getFoundData();
12805   if (FoundFn != CGF.CurFn) {
12806     // Special codegen for inner parallel regions.
12807     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12808     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12809     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12810            "Lastprivate conditional is not found in outer region.");
12811     QualType StructTy = std::get<0>(It->getSecond());
12812     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12813     LValue PrivLVal = CGF.EmitLValue(FoundE);
12814     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12815         PrivLVal.getAddress(CGF),
12816         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12817         CGF.ConvertTypeForMem(StructTy));
12818     LValue BaseLVal =
12819         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12820     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12821     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12822                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12823                         FiredLVal, llvm::AtomicOrdering::Unordered,
12824                         /*IsVolatile=*/true, /*isInit=*/false);
12825     return;
12826   }
12827 
12828   // Private address of the lastprivate conditional in the current context.
12829   // priv_a
12830   LValue LVal = CGF.EmitLValue(FoundE);
12831   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12832                                    FoundE->getExprLoc());
12833 }
12834 
12835 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12836     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12837     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12838   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12839     return;
12840   auto Range = llvm::reverse(LastprivateConditionalStack);
12841   auto It = llvm::find_if(
12842       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12843   if (It == Range.end() || It->Fn != CGF.CurFn)
12844     return;
12845   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12846   assert(LPCI != LastprivateConditionalToTypes.end() &&
12847          "Lastprivates must be registered already.");
12848   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12849   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12850   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12851   for (const auto &Pair : It->DeclToUniqueName) {
12852     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12853     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12854       continue;
12855     auto I = LPCI->getSecond().find(Pair.first);
12856     assert(I != LPCI->getSecond().end() &&
12857            "Lastprivate must be rehistered already.");
12858     // bool Cmp = priv_a.Fired != 0;
12859     LValue BaseLVal = std::get<3>(I->getSecond());
12860     LValue FiredLVal =
12861         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12862     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12863     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12864     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12865     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12866     // if (Cmp) {
12867     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12868     CGF.EmitBlock(ThenBB);
12869     Address Addr = CGF.GetAddrOfLocalVar(VD);
12870     LValue LVal;
12871     if (VD->getType()->isReferenceType())
12872       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12873                                            AlignmentSource::Decl);
12874     else
12875       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12876                                 AlignmentSource::Decl);
12877     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12878                                      D.getBeginLoc());
12879     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12880     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12881     // }
12882   }
12883 }
12884 
12885 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12886     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12887     SourceLocation Loc) {
12888   if (CGF.getLangOpts().OpenMP < 50)
12889     return;
12890   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12891   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12892          "Unknown lastprivate conditional variable.");
12893   StringRef UniqueName = It->second;
12894   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12895   // The variable was not updated in the region - exit.
12896   if (!GV)
12897     return;
12898   LValue LPLVal = CGF.MakeAddrLValue(
12899       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12900       PrivLVal.getType().getNonReferenceType());
12901   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12902   CGF.EmitStoreOfScalar(Res, PrivLVal);
12903 }
12904 
12905 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12906     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12907     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12908   llvm_unreachable("Not supported in SIMD-only mode");
12909 }
12910 
12911 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12912     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12913     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12914   llvm_unreachable("Not supported in SIMD-only mode");
12915 }
12916 
12917 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12918     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12919     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12920     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12921     bool Tied, unsigned &NumberOfParts) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
12925 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12926                                            SourceLocation Loc,
12927                                            llvm::Function *OutlinedFn,
12928                                            ArrayRef<llvm::Value *> CapturedVars,
12929                                            const Expr *IfCond,
12930                                            llvm::Value *NumThreads) {
12931   llvm_unreachable("Not supported in SIMD-only mode");
12932 }
12933 
12934 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12935     CodeGenFunction &CGF, StringRef CriticalName,
12936     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12937     const Expr *Hint) {
12938   llvm_unreachable("Not supported in SIMD-only mode");
12939 }
12940 
12941 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12942                                            const RegionCodeGenTy &MasterOpGen,
12943                                            SourceLocation Loc) {
12944   llvm_unreachable("Not supported in SIMD-only mode");
12945 }
12946 
12947 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12948                                            const RegionCodeGenTy &MasterOpGen,
12949                                            SourceLocation Loc,
12950                                            const Expr *Filter) {
12951   llvm_unreachable("Not supported in SIMD-only mode");
12952 }
12953 
12954 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12955                                             SourceLocation Loc) {
12956   llvm_unreachable("Not supported in SIMD-only mode");
12957 }
12958 
12959 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12960     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12961     SourceLocation Loc) {
12962   llvm_unreachable("Not supported in SIMD-only mode");
12963 }
12964 
12965 void CGOpenMPSIMDRuntime::emitSingleRegion(
12966     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12967     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12968     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12969     ArrayRef<const Expr *> AssignmentOps) {
12970   llvm_unreachable("Not supported in SIMD-only mode");
12971 }
12972 
12973 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12974                                             const RegionCodeGenTy &OrderedOpGen,
12975                                             SourceLocation Loc,
12976                                             bool IsThreads) {
12977   llvm_unreachable("Not supported in SIMD-only mode");
12978 }
12979 
12980 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12981                                           SourceLocation Loc,
12982                                           OpenMPDirectiveKind Kind,
12983                                           bool EmitChecks,
12984                                           bool ForceSimpleCall) {
12985   llvm_unreachable("Not supported in SIMD-only mode");
12986 }
12987 
12988 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12989     CodeGenFunction &CGF, SourceLocation Loc,
12990     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12991     bool Ordered, const DispatchRTInput &DispatchValues) {
12992   llvm_unreachable("Not supported in SIMD-only mode");
12993 }
12994 
12995 void CGOpenMPSIMDRuntime::emitForStaticInit(
12996     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12997     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12998   llvm_unreachable("Not supported in SIMD-only mode");
12999 }
13000 
13001 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
13002     CodeGenFunction &CGF, SourceLocation Loc,
13003     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13004   llvm_unreachable("Not supported in SIMD-only mode");
13005 }
13006 
13007 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
13008                                                      SourceLocation Loc,
13009                                                      unsigned IVSize,
13010                                                      bool IVSigned) {
13011   llvm_unreachable("Not supported in SIMD-only mode");
13012 }
13013 
13014 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
13015                                               SourceLocation Loc,
13016                                               OpenMPDirectiveKind DKind) {
13017   llvm_unreachable("Not supported in SIMD-only mode");
13018 }
13019 
13020 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
13021                                               SourceLocation Loc,
13022                                               unsigned IVSize, bool IVSigned,
13023                                               Address IL, Address LB,
13024                                               Address UB, Address ST) {
13025   llvm_unreachable("Not supported in SIMD-only mode");
13026 }
13027 
13028 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
13029                                                llvm::Value *NumThreads,
13030                                                SourceLocation Loc) {
13031   llvm_unreachable("Not supported in SIMD-only mode");
13032 }
13033 
13034 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13035                                              ProcBindKind ProcBind,
13036                                              SourceLocation Loc) {
13037   llvm_unreachable("Not supported in SIMD-only mode");
13038 }
13039 
13040 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13041                                                     const VarDecl *VD,
13042                                                     Address VDAddr,
13043                                                     SourceLocation Loc) {
13044   llvm_unreachable("Not supported in SIMD-only mode");
13045 }
13046 
13047 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13048     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13049     CodeGenFunction *CGF) {
13050   llvm_unreachable("Not supported in SIMD-only mode");
13051 }
13052 
13053 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13054     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13055   llvm_unreachable("Not supported in SIMD-only mode");
13056 }
13057 
13058 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13059                                     ArrayRef<const Expr *> Vars,
13060                                     SourceLocation Loc,
13061                                     llvm::AtomicOrdering AO) {
13062   llvm_unreachable("Not supported in SIMD-only mode");
13063 }
13064 
13065 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13066                                        const OMPExecutableDirective &D,
13067                                        llvm::Function *TaskFunction,
13068                                        QualType SharedsTy, Address Shareds,
13069                                        const Expr *IfCond,
13070                                        const OMPTaskDataTy &Data) {
13071   llvm_unreachable("Not supported in SIMD-only mode");
13072 }
13073 
13074 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13075     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13076     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13077     const Expr *IfCond, const OMPTaskDataTy &Data) {
13078   llvm_unreachable("Not supported in SIMD-only mode");
13079 }
13080 
13081 void CGOpenMPSIMDRuntime::emitReduction(
13082     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13083     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13084     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13085   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13086   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13087                                  ReductionOps, Options);
13088 }
13089 
13090 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13091     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13092     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13093   llvm_unreachable("Not supported in SIMD-only mode");
13094 }
13095 
13096 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13097                                                 SourceLocation Loc,
13098                                                 bool IsWorksharingReduction) {
13099   llvm_unreachable("Not supported in SIMD-only mode");
13100 }
13101 
13102 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13103                                                   SourceLocation Loc,
13104                                                   ReductionCodeGen &RCG,
13105                                                   unsigned N) {
13106   llvm_unreachable("Not supported in SIMD-only mode");
13107 }
13108 
13109 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13110                                                   SourceLocation Loc,
13111                                                   llvm::Value *ReductionsPtr,
13112                                                   LValue SharedLVal) {
13113   llvm_unreachable("Not supported in SIMD-only mode");
13114 }
13115 
13116 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13117                                            SourceLocation Loc,
13118                                            const OMPTaskDataTy &Data) {
13119   llvm_unreachable("Not supported in SIMD-only mode");
13120 }
13121 
13122 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13123     CodeGenFunction &CGF, SourceLocation Loc,
13124     OpenMPDirectiveKind CancelRegion) {
13125   llvm_unreachable("Not supported in SIMD-only mode");
13126 }
13127 
13128 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13129                                          SourceLocation Loc, const Expr *IfCond,
13130                                          OpenMPDirectiveKind CancelRegion) {
13131   llvm_unreachable("Not supported in SIMD-only mode");
13132 }
13133 
13134 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13135     const OMPExecutableDirective &D, StringRef ParentName,
13136     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13137     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13138   llvm_unreachable("Not supported in SIMD-only mode");
13139 }
13140 
13141 void CGOpenMPSIMDRuntime::emitTargetCall(
13142     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13143     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13144     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13145     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13146                                      const OMPLoopDirective &D)>
13147         SizeEmitter) {
13148   llvm_unreachable("Not supported in SIMD-only mode");
13149 }
13150 
13151 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13152   llvm_unreachable("Not supported in SIMD-only mode");
13153 }
13154 
13155 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13156   llvm_unreachable("Not supported in SIMD-only mode");
13157 }
13158 
13159 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13160   return false;
13161 }
13162 
13163 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13164                                         const OMPExecutableDirective &D,
13165                                         SourceLocation Loc,
13166                                         llvm::Function *OutlinedFn,
13167                                         ArrayRef<llvm::Value *> CapturedVars) {
13168   llvm_unreachable("Not supported in SIMD-only mode");
13169 }
13170 
13171 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13172                                              const Expr *NumTeams,
13173                                              const Expr *ThreadLimit,
13174                                              SourceLocation Loc) {
13175   llvm_unreachable("Not supported in SIMD-only mode");
13176 }
13177 
13178 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13179     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13180     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13181   llvm_unreachable("Not supported in SIMD-only mode");
13182 }
13183 
13184 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13185     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13186     const Expr *Device) {
13187   llvm_unreachable("Not supported in SIMD-only mode");
13188 }
13189 
13190 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13191                                            const OMPLoopDirective &D,
13192                                            ArrayRef<Expr *> NumIterations) {
13193   llvm_unreachable("Not supported in SIMD-only mode");
13194 }
13195 
13196 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13197                                               const OMPDependClause *C) {
13198   llvm_unreachable("Not supported in SIMD-only mode");
13199 }
13200 
13201 const VarDecl *
13202 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13203                                         const VarDecl *NativeParam) const {
13204   llvm_unreachable("Not supported in SIMD-only mode");
13205 }
13206 
13207 Address
13208 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13209                                          const VarDecl *NativeParam,
13210                                          const VarDecl *TargetParam) const {
13211   llvm_unreachable("Not supported in SIMD-only mode");
13212 }
13213