1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/BitmaskEnum.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413   bool NoInheritance = false;
414 
415 public:
416   /// Constructs region for combined constructs.
417   /// \param CodeGen Code generation sequence for combined directives. Includes
418   /// a list of functions used for code generation of implicitly inlined
419   /// regions.
420   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421                           OpenMPDirectiveKind Kind, bool HasCancel,
422                           bool NoInheritance = true)
423       : CGF(CGF), NoInheritance(NoInheritance) {
424     // Start emission for the construct.
425     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427     if (NoInheritance) {
428       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430       CGF.LambdaThisCaptureField = nullptr;
431       BlockInfo = CGF.BlockInfo;
432       CGF.BlockInfo = nullptr;
433     }
434   }
435 
436   ~InlinedOpenMPRegionRAII() {
437     // Restore original CapturedStmtInfo only if we're done with code emission.
438     auto *OldCSI =
439         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440     delete CGF.CapturedStmtInfo;
441     CGF.CapturedStmtInfo = OldCSI;
442     if (NoInheritance) {
443       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445       CGF.BlockInfo = BlockInfo;
446     }
447   }
448 };
449 
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454   /// Use trampoline for internal microtask.
455   OMP_IDENT_IMD = 0x01,
456   /// Use c-style ident structure.
457   OMP_IDENT_KMPC = 0x02,
458   /// Atomic reduction option for kmpc_reduce.
459   OMP_ATOMIC_REDUCE = 0x10,
460   /// Explicit 'barrier' directive.
461   OMP_IDENT_BARRIER_EXPL = 0x20,
462   /// Implicit barrier in code.
463   OMP_IDENT_BARRIER_IMPL = 0x40,
464   /// Implicit barrier in 'for' directive.
465   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466   /// Implicit barrier in 'sections' directive.
467   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468   /// Implicit barrier in 'single' directive.
469   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470   /// Call of __kmp_for_static_init for static loop.
471   OMP_IDENT_WORK_LOOP = 0x200,
472   /// Call of __kmp_for_static_init for sections.
473   OMP_IDENT_WORK_SECTIONS = 0x400,
474   /// Call of __kmp_for_static_init for distribute.
475   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478 
479 namespace {
480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481 /// Values for bit flags for marking which requires clauses have been used.
482 enum OpenMPOffloadingRequiresDirFlags : int64_t {
483   /// flag undefined.
484   OMP_REQ_UNDEFINED               = 0x000,
485   /// no requires clause present.
486   OMP_REQ_NONE                    = 0x001,
487   /// reverse_offload clause.
488   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
489   /// unified_address clause.
490   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
491   /// unified_shared_memory clause.
492   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
493   /// dynamic_allocators clause.
494   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
495   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497 
498 enum OpenMPOffloadingReservedDeviceIDs {
499   /// Device ID if the device was not defined, runtime should get it
500   /// from environment variables in the spec.
501   OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504 
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
511 ///                                  see above  */
512 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
513 ///                                  KMP_IDENT_KMPC identifies this union
514 ///                                  member  */
515 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
516 ///                                  see above */
517 ///#if USE_ITT_BUILD
518 ///                            /*  but currently used for storing
519 ///                                region-specific ITT */
520 ///                            /*  contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
523 ///                                 C++  */
524 ///    char const *psource;    /**< String describing the source location.
525 ///                            The string is composed of semi-colon separated
526 //                             fields which describe the source file,
527 ///                            the function and a pair of line numbers that
528 ///                            delimit the construct.
529 ///                             */
530 /// } ident_t;
531 enum IdentFieldIndex {
532   /// might be used in Fortran
533   IdentField_Reserved_1,
534   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535   IdentField_Flags,
536   /// Not really used in Fortran any more
537   IdentField_Reserved_2,
538   /// Source[4] in Fortran, do not use for C++
539   IdentField_Reserved_3,
540   /// String describing the source location. The string is composed of
541   /// semi-colon separated fields which describe the source file, the function
542   /// and a pair of line numbers that delimit the construct.
543   IdentField_PSource
544 };
545 
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549   /// Lower bound for default (unordered) versions.
550   OMP_sch_lower = 32,
551   OMP_sch_static_chunked = 33,
552   OMP_sch_static = 34,
553   OMP_sch_dynamic_chunked = 35,
554   OMP_sch_guided_chunked = 36,
555   OMP_sch_runtime = 37,
556   OMP_sch_auto = 38,
557   /// static with chunk adjustment (e.g., simd)
558   OMP_sch_static_balanced_chunked = 45,
559   /// Lower bound for 'ordered' versions.
560   OMP_ord_lower = 64,
561   OMP_ord_static_chunked = 65,
562   OMP_ord_static = 66,
563   OMP_ord_dynamic_chunked = 67,
564   OMP_ord_guided_chunked = 68,
565   OMP_ord_runtime = 69,
566   OMP_ord_auto = 70,
567   OMP_sch_default = OMP_sch_static,
568   /// dist_schedule types
569   OMP_dist_sch_static_chunked = 91,
570   OMP_dist_sch_static = 92,
571   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572   /// Set if the monotonic schedule modifier was present.
573   OMP_sch_modifier_monotonic = (1 << 29),
574   /// Set if the nonmonotonic schedule modifier was present.
575   OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577 
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581   PrePostActionTy *Action;
582 
583 public:
584   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586     if (!CGF.HaveInsertPoint())
587       return;
588     Action->Exit(CGF);
589   }
590 };
591 
592 } // anonymous namespace
593 
594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595   CodeGenFunction::RunCleanupsScope Scope(CGF);
596   if (PrePostAction) {
597     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598     Callback(CodeGen, CGF, *PrePostAction);
599   } else {
600     PrePostActionTy Action;
601     Callback(CodeGen, CGF, Action);
602   }
603 }
604 
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
608 getReductionInit(const Expr *ReductionOp) {
609   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611       if (const auto *DRE =
612               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614           return DRD;
615   return nullptr;
616 }
617 
618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619                                              const OMPDeclareReductionDecl *DRD,
620                                              const Expr *InitOp,
621                                              Address Private, Address Original,
622                                              QualType Ty) {
623   if (DRD->getInitializer()) {
624     std::pair<llvm::Function *, llvm::Function *> Reduction =
625         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626     const auto *CE = cast<CallExpr>(InitOp);
627     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630     const auto *LHSDRE =
631         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632     const auto *RHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636                             [=]() { return Private; });
637     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638                             [=]() { return Original; });
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   DestAddr =
691       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd =
702       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
703   // The basic structure here is a while-do loop.
704   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
705   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
706   llvm::Value *IsEmpty =
707       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
708   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
709 
710   // Enter the loop body, making that address the current address.
711   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
712   CGF.EmitBlock(BodyBB);
713 
714   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
715 
716   llvm::PHINode *SrcElementPHI = nullptr;
717   Address SrcElementCurrent = Address::invalid();
718   if (DRD) {
719     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
720                                           "omp.arraycpy.srcElementPast");
721     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
722     SrcElementCurrent =
723         Address(SrcElementPHI,
724                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725   }
726   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
727       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
728   DestElementPHI->addIncoming(DestBegin, EntryBB);
729   Address DestElementCurrent =
730       Address(DestElementPHI,
731               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732 
733   // Emit copy.
734   {
735     CodeGenFunction::RunCleanupsScope InitScope(CGF);
736     if (EmitDeclareReductionInit) {
737       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
738                                        SrcElementCurrent, ElementTy);
739     } else
740       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
741                            /*IsInitializer=*/false);
742   }
743 
744   if (DRD) {
745     // Shift the address forward by one element.
746     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
747         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
748         "omp.arraycpy.dest.element");
749     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750   }
751 
752   // Shift the address forward by one element.
753   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
754       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
755       "omp.arraycpy.dest.element");
756   // Check whether we've reached the end.
757   llvm::Value *Done =
758       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
759   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
760   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
761 
762   // Done.
763   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
767   return CGF.EmitOMPSharedLValue(E);
768 }
769 
770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
771                                             const Expr *E) {
772   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
773     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
774   return LValue();
775 }
776 
777 void ReductionCodeGen::emitAggregateInitialization(
778     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
779     const OMPDeclareReductionDecl *DRD) {
780   // Emit VarDecl with copy init for arrays.
781   // Get the address of the original variable captured in current
782   // captured region.
783   const auto *PrivateVD =
784       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
785   bool EmitDeclareReductionInit =
786       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
787   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
788                        EmitDeclareReductionInit,
789                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
790                                                 : PrivateVD->getInit(),
791                        DRD, SharedLVal.getAddress(CGF));
792 }
793 
794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
795                                    ArrayRef<const Expr *> Origs,
796                                    ArrayRef<const Expr *> Privates,
797                                    ArrayRef<const Expr *> ReductionOps) {
798   ClausesData.reserve(Shareds.size());
799   SharedAddresses.reserve(Shareds.size());
800   Sizes.reserve(Shareds.size());
801   BaseDecls.reserve(Shareds.size());
802   const auto *IOrig = Origs.begin();
803   const auto *IPriv = Privates.begin();
804   const auto *IRed = ReductionOps.begin();
805   for (const Expr *Ref : Shareds) {
806     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
807     std::advance(IOrig, 1);
808     std::advance(IPriv, 1);
809     std::advance(IRed, 1);
810   }
811 }
812 
813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
814   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
815          "Number of generated lvalues must be exactly N.");
816   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
817   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
818   SharedAddresses.emplace_back(First, Second);
819   if (ClausesData[N].Shared == ClausesData[N].Ref) {
820     OrigAddresses.emplace_back(First, Second);
821   } else {
822     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
823     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
824     OrigAddresses.emplace_back(First, Second);
825   }
826 }
827 
828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
829   const auto *PrivateVD =
830       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
831   QualType PrivateType = PrivateVD->getType();
832   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
833   if (!PrivateType->isVariablyModifiedType()) {
834     Sizes.emplace_back(
835         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
836         nullptr);
837     return;
838   }
839   llvm::Value *Size;
840   llvm::Value *SizeInChars;
841   auto *ElemType =
842       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
843           ->getElementType();
844   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
845   if (AsArraySection) {
846     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
847                                      OrigAddresses[N].first.getPointer(CGF));
848     Size = CGF.Builder.CreateNUWAdd(
849         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
850     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
851   } else {
852     SizeInChars =
853         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
854     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
855   }
856   Sizes.emplace_back(SizeInChars, Size);
857   CodeGenFunction::OpaqueValueMapping OpaqueMap(
858       CGF,
859       cast<OpaqueValueExpr>(
860           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
861       RValue::get(Size));
862   CGF.EmitVariablyModifiedType(PrivateType);
863 }
864 
865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
866                                          llvm::Value *Size) {
867   const auto *PrivateVD =
868       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
869   QualType PrivateType = PrivateVD->getType();
870   if (!PrivateType->isVariablyModifiedType()) {
871     assert(!Size && !Sizes[N].second &&
872            "Size should be nullptr for non-variably modified reduction "
873            "items.");
874     return;
875   }
876   CodeGenFunction::OpaqueValueMapping OpaqueMap(
877       CGF,
878       cast<OpaqueValueExpr>(
879           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
880       RValue::get(Size));
881   CGF.EmitVariablyModifiedType(PrivateType);
882 }
883 
884 void ReductionCodeGen::emitInitialization(
885     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
886     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
887   assert(SharedAddresses.size() > N && "No variable was generated");
888   const auto *PrivateVD =
889       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
890   const OMPDeclareReductionDecl *DRD =
891       getReductionInit(ClausesData[N].ReductionOp);
892   QualType PrivateType = PrivateVD->getType();
893   PrivateAddr = CGF.Builder.CreateElementBitCast(
894       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
895   QualType SharedType = SharedAddresses[N].first.getType();
896   SharedLVal = CGF.MakeAddrLValue(
897       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
898                                        CGF.ConvertTypeForMem(SharedType)),
899       SharedType, SharedAddresses[N].first.getBaseInfo(),
900       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
901   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
902     if (DRD && DRD->getInitializer())
903       (void)DefaultInit(CGF);
904     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
905   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
906     (void)DefaultInit(CGF);
907     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
908                                      PrivateAddr, SharedLVal.getAddress(CGF),
909                                      SharedLVal.getType());
910   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
911              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
912     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
913                          PrivateVD->getType().getQualifiers(),
914                          /*IsInitializer=*/false);
915   }
916 }
917 
918 bool ReductionCodeGen::needCleanups(unsigned N) {
919   const auto *PrivateVD =
920       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
921   QualType PrivateType = PrivateVD->getType();
922   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
923   return DTorKind != QualType::DK_none;
924 }
925 
926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
927                                     Address PrivateAddr) {
928   const auto *PrivateVD =
929       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
930   QualType PrivateType = PrivateVD->getType();
931   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
932   if (needCleanups(N)) {
933     PrivateAddr = CGF.Builder.CreateElementBitCast(
934         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
935     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
936   }
937 }
938 
939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           LValue BaseLV) {
941   BaseTy = BaseTy.getNonReferenceType();
942   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
943          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
944     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
945       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
946     } else {
947       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
948       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
949     }
950     BaseTy = BaseTy->getPointeeType();
951   }
952   return CGF.MakeAddrLValue(
953       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
954                                        CGF.ConvertTypeForMem(ElTy)),
955       BaseLV.getType(), BaseLV.getBaseInfo(),
956       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
957 }
958 
959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
960                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
961                           llvm::Value *Addr) {
962   Address Tmp = Address::invalid();
963   Address TopTmp = Address::invalid();
964   Address MostTopTmp = Address::invalid();
965   BaseTy = BaseTy.getNonReferenceType();
966   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
967          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
968     Tmp = CGF.CreateMemTemp(BaseTy);
969     if (TopTmp.isValid())
970       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
971     else
972       MostTopTmp = Tmp;
973     TopTmp = Tmp;
974     BaseTy = BaseTy->getPointeeType();
975   }
976   llvm::Type *Ty = BaseLVType;
977   if (Tmp.isValid())
978     Ty = Tmp.getElementType();
979   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
980   if (Tmp.isValid()) {
981     CGF.Builder.CreateStore(Addr, Tmp);
982     return MostTopTmp;
983   }
984   return Address(Addr, BaseLVAlignment);
985 }
986 
987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
988   const VarDecl *OrigVD = nullptr;
989   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
990     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
992       Base = TempOASE->getBase()->IgnoreParenImpCasts();
993     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
994       Base = TempASE->getBase()->IgnoreParenImpCasts();
995     DE = cast<DeclRefExpr>(Base);
996     OrigVD = cast<VarDecl>(DE->getDecl());
997   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
998     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
999     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1000       Base = TempASE->getBase()->IgnoreParenImpCasts();
1001     DE = cast<DeclRefExpr>(Base);
1002     OrigVD = cast<VarDecl>(DE->getDecl());
1003   }
1004   return OrigVD;
1005 }
1006 
1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1008                                                Address PrivateAddr) {
1009   const DeclRefExpr *DE;
1010   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1011     BaseDecls.emplace_back(OrigVD);
1012     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1013     LValue BaseLValue =
1014         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1015                     OriginalBaseLValue);
1016     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1017     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1018         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1019     llvm::Value *PrivatePointer =
1020         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1021             PrivateAddr.getPointer(), SharedAddr.getType());
1022     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1023         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1024     return castToBase(CGF, OrigVD->getType(),
1025                       SharedAddresses[N].first.getType(),
1026                       OriginalBaseLValue.getAddress(CGF).getType(),
1027                       OriginalBaseLValue.getAlignment(), Ptr);
1028   }
1029   BaseDecls.emplace_back(
1030       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1031   return PrivateAddr;
1032 }
1033 
1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1035   const OMPDeclareReductionDecl *DRD =
1036       getReductionInit(ClausesData[N].ReductionOp);
1037   return DRD && DRD->getInitializer();
1038 }
1039 
1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1041   return CGF.EmitLoadOfPointerLValue(
1042       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1043       getThreadIDVariable()->getType()->castAs<PointerType>());
1044 }
1045 
1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1047   if (!CGF.HaveInsertPoint())
1048     return;
1049   // 1.2.2 OpenMP Language Terminology
1050   // Structured block - An executable statement with a single entry at the
1051   // top and a single exit at the bottom.
1052   // The point of exit cannot be a branch out of the structured block.
1053   // longjmp() and throw() must not violate the entry/exit criteria.
1054   CGF.EHStack.pushTerminate();
1055   if (S)
1056     CGF.incrementProfileCounter(S);
1057   CodeGen(CGF);
1058   CGF.EHStack.popTerminate();
1059 }
1060 
1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1062     CodeGenFunction &CGF) {
1063   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1064                             getThreadIDVariable()->getType(),
1065                             AlignmentSource::Decl);
1066 }
1067 
1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1069                                        QualType FieldTy) {
1070   auto *Field = FieldDecl::Create(
1071       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1072       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1073       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1074   Field->setAccess(AS_public);
1075   DC->addDecl(Field);
1076   return Field;
1077 }
1078 
1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1080                                  StringRef Separator)
1081     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1082       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1083   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1084 
1085   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1086   OMPBuilder.initialize();
1087   loadOffloadInfoMetadata();
1088 }
1089 
1090 void CGOpenMPRuntime::clear() {
1091   InternalVars.clear();
1092   // Clean non-target variable declarations possibly used only in debug info.
1093   for (const auto &Data : EmittedNonTargetVariables) {
1094     if (!Data.getValue().pointsToAliveValue())
1095       continue;
1096     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1097     if (!GV)
1098       continue;
1099     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1100       continue;
1101     GV->eraseFromParent();
1102   }
1103 }
1104 
1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1106   SmallString<128> Buffer;
1107   llvm::raw_svector_ostream OS(Buffer);
1108   StringRef Sep = FirstSeparator;
1109   for (StringRef Part : Parts) {
1110     OS << Sep << Part;
1111     Sep = Separator;
1112   }
1113   return std::string(OS.str());
1114 }
1115 
1116 static llvm::Function *
1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1118                           const Expr *CombinerInitializer, const VarDecl *In,
1119                           const VarDecl *Out, bool IsCombiner) {
1120   // void .omp_combiner.(Ty *in, Ty *out);
1121   ASTContext &C = CGM.getContext();
1122   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1123   FunctionArgList Args;
1124   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1125                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1127                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1128   Args.push_back(&OmpOutParm);
1129   Args.push_back(&OmpInParm);
1130   const CGFunctionInfo &FnInfo =
1131       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1132   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1133   std::string Name = CGM.getOpenMPRuntime().getName(
1134       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1135   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1136                                     Name, &CGM.getModule());
1137   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1138   if (CGM.getLangOpts().Optimize) {
1139     Fn->removeFnAttr(llvm::Attribute::NoInline);
1140     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1141     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1142   }
1143   CodeGenFunction CGF(CGM);
1144   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1145   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1146   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1147                     Out->getLocation());
1148   CodeGenFunction::OMPPrivateScope Scope(CGF);
1149   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1150   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1151     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1152         .getAddress(CGF);
1153   });
1154   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1155   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1156     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1157         .getAddress(CGF);
1158   });
1159   (void)Scope.Privatize();
1160   if (!IsCombiner && Out->hasInit() &&
1161       !CGF.isTrivialInitializer(Out->getInit())) {
1162     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1163                          Out->getType().getQualifiers(),
1164                          /*IsInitializer=*/true);
1165   }
1166   if (CombinerInitializer)
1167     CGF.EmitIgnoredExpr(CombinerInitializer);
1168   Scope.ForceCleanup();
1169   CGF.FinishFunction();
1170   return Fn;
1171 }
1172 
1173 void CGOpenMPRuntime::emitUserDefinedReduction(
1174     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1175   if (UDRMap.count(D) > 0)
1176     return;
1177   llvm::Function *Combiner = emitCombinerOrInitializer(
1178       CGM, D->getType(), D->getCombiner(),
1179       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1181       /*IsCombiner=*/true);
1182   llvm::Function *Initializer = nullptr;
1183   if (const Expr *Init = D->getInitializer()) {
1184     Initializer = emitCombinerOrInitializer(
1185         CGM, D->getType(),
1186         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1187                                                                      : nullptr,
1188         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1190         /*IsCombiner=*/false);
1191   }
1192   UDRMap.try_emplace(D, Combiner, Initializer);
1193   if (CGF) {
1194     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1195     Decls.second.push_back(D);
1196   }
1197 }
1198 
1199 std::pair<llvm::Function *, llvm::Function *>
1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1201   auto I = UDRMap.find(D);
1202   if (I != UDRMap.end())
1203     return I->second;
1204   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1205   return UDRMap.lookup(D);
1206 }
1207 
1208 namespace {
1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1210 // Builder if one is present.
1211 struct PushAndPopStackRAII {
1212   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1213                       bool HasCancel, llvm::omp::Directive Kind)
1214       : OMPBuilder(OMPBuilder) {
1215     if (!OMPBuilder)
1216       return;
1217 
1218     // The following callback is the crucial part of clangs cleanup process.
1219     //
1220     // NOTE:
1221     // Once the OpenMPIRBuilder is used to create parallel regions (and
1222     // similar), the cancellation destination (Dest below) is determined via
1223     // IP. That means if we have variables to finalize we split the block at IP,
1224     // use the new block (=BB) as destination to build a JumpDest (via
1225     // getJumpDestInCurrentScope(BB)) which then is fed to
1226     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1227     // to push & pop an FinalizationInfo object.
1228     // The FiniCB will still be needed but at the point where the
1229     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1230     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1231       assert(IP.getBlock()->end() == IP.getPoint() &&
1232              "Clang CG should cause non-terminated block!");
1233       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1234       CGF.Builder.restoreIP(IP);
1235       CodeGenFunction::JumpDest Dest =
1236           CGF.getOMPCancelDestination(OMPD_parallel);
1237       CGF.EmitBranchThroughCleanup(Dest);
1238     };
1239 
1240     // TODO: Remove this once we emit parallel regions through the
1241     //       OpenMPIRBuilder as it can do this setup internally.
1242     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1243     OMPBuilder->pushFinalizationCB(std::move(FI));
1244   }
1245   ~PushAndPopStackRAII() {
1246     if (OMPBuilder)
1247       OMPBuilder->popFinalizationCB();
1248   }
1249   llvm::OpenMPIRBuilder *OMPBuilder;
1250 };
1251 } // namespace
1252 
1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1254     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1255     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1256     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1257   assert(ThreadIDVar->getType()->isPointerType() &&
1258          "thread id variable must be of type kmp_int32 *");
1259   CodeGenFunction CGF(CGM, true);
1260   bool HasCancel = false;
1261   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1262     HasCancel = OPD->hasCancel();
1263   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1264     HasCancel = OPD->hasCancel();
1265   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1266     HasCancel = OPSD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273   else if (const auto *OPFD =
1274                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1275     HasCancel = OPFD->hasCancel();
1276   else if (const auto *OPFD =
1277                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1278     HasCancel = OPFD->hasCancel();
1279 
1280   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1281   //       parallel region to make cancellation barriers work properly.
1282   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1283   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1285                                     HasCancel, OutlinedHelperName);
1286   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1287   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1293   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1294   return emitParallelOrTeamsOutlinedFunction(
1295       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1296 }
1297 
1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1299     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1300     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1301   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1302   return emitParallelOrTeamsOutlinedFunction(
1303       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1304 }
1305 
1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1307     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1308     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1309     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1310     bool Tied, unsigned &NumberOfParts) {
1311   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1312                                               PrePostActionTy &) {
1313     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1314     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1315     llvm::Value *TaskArgs[] = {
1316         UpLoc, ThreadID,
1317         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1318                                     TaskTVar->getType()->castAs<PointerType>())
1319             .getPointer(CGF)};
1320     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1321                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1322                         TaskArgs);
1323   };
1324   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1325                                                             UntiedCodeGen);
1326   CodeGen.setAction(Action);
1327   assert(!ThreadIDVar->getType()->isPointerType() &&
1328          "thread id variable must be of type kmp_int32 for tasks");
1329   const OpenMPDirectiveKind Region =
1330       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1331                                                       : OMPD_task;
1332   const CapturedStmt *CS = D.getCapturedStmt(Region);
1333   bool HasCancel = false;
1334   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1339     HasCancel = TD->hasCancel();
1340   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1341     HasCancel = TD->hasCancel();
1342 
1343   CodeGenFunction CGF(CGM, true);
1344   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1345                                         InnermostKind, HasCancel, Action);
1346   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1347   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1348   if (!Tied)
1349     NumberOfParts = Action.getNumberOfParts();
1350   return Res;
1351 }
1352 
1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1354                              const RecordDecl *RD, const CGRecordLayout &RL,
1355                              ArrayRef<llvm::Constant *> Data) {
1356   llvm::StructType *StructTy = RL.getLLVMType();
1357   unsigned PrevIdx = 0;
1358   ConstantInitBuilder CIBuilder(CGM);
1359   auto DI = Data.begin();
1360   for (const FieldDecl *FD : RD->fields()) {
1361     unsigned Idx = RL.getLLVMFieldNo(FD);
1362     // Fill the alignment.
1363     for (unsigned I = PrevIdx; I < Idx; ++I)
1364       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1365     PrevIdx = Idx + 1;
1366     Fields.add(*DI);
1367     ++DI;
1368   }
1369 }
1370 
1371 template <class... As>
1372 static llvm::GlobalVariable *
1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1374                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1375                    As &&... Args) {
1376   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1377   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1378   ConstantInitBuilder CIBuilder(CGM);
1379   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1380   buildStructValue(Fields, CGM, RD, RL, Data);
1381   return Fields.finishAndCreateGlobal(
1382       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1383       std::forward<As>(Args)...);
1384 }
1385 
1386 template <typename T>
1387 static void
1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1389                                          ArrayRef<llvm::Constant *> Data,
1390                                          T &Parent) {
1391   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1392   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1393   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1394   buildStructValue(Fields, CGM, RD, RL, Data);
1395   Fields.finishAndAddTo(Parent);
1396 }
1397 
1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1399                                              bool AtCurrentPoint) {
1400   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1401   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1402 
1403   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1404   if (AtCurrentPoint) {
1405     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1406         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1407   } else {
1408     Elem.second.ServiceInsertPt =
1409         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1410     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1411   }
1412 }
1413 
1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1415   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1416   if (Elem.second.ServiceInsertPt) {
1417     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1418     Elem.second.ServiceInsertPt = nullptr;
1419     Ptr->eraseFromParent();
1420   }
1421 }
1422 
1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1424                                                   SourceLocation Loc,
1425                                                   SmallString<128> &Buffer) {
1426   llvm::raw_svector_ostream OS(Buffer);
1427   // Build debug location
1428   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1429   OS << ";" << PLoc.getFilename() << ";";
1430   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1431     OS << FD->getQualifiedNameAsString();
1432   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1433   return OS.str();
1434 }
1435 
1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1437                                                  SourceLocation Loc,
1438                                                  unsigned Flags) {
1439   llvm::Constant *SrcLocStr;
1440   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1441       Loc.isInvalid()) {
1442     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1443   } else {
1444     std::string FunctionName = "";
1445     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1446       FunctionName = FD->getQualifiedNameAsString();
1447     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1448     const char *FileName = PLoc.getFilename();
1449     unsigned Line = PLoc.getLine();
1450     unsigned Column = PLoc.getColumn();
1451     SrcLocStr =
1452         OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column);
1453   }
1454   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1455   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1456                                      Reserved2Flags);
1457 }
1458 
1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1460                                           SourceLocation Loc) {
1461   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1462   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1463   // the clang invariants used below might be broken.
1464   if (CGM.getLangOpts().OpenMPIRBuilder) {
1465     SmallString<128> Buffer;
1466     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1467     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1468         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1469     return OMPBuilder.getOrCreateThreadID(
1470         OMPBuilder.getOrCreateIdent(SrcLocStr));
1471   }
1472 
1473   llvm::Value *ThreadID = nullptr;
1474   // Check whether we've already cached a load of the thread id in this
1475   // function.
1476   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1477   if (I != OpenMPLocThreadIDMap.end()) {
1478     ThreadID = I->second.ThreadID;
1479     if (ThreadID != nullptr)
1480       return ThreadID;
1481   }
1482   // If exceptions are enabled, do not use parameter to avoid possible crash.
1483   if (auto *OMPRegionInfo =
1484           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1485     if (OMPRegionInfo->getThreadIDVariable()) {
1486       // Check if this an outlined function with thread id passed as argument.
1487       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1488       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1489       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1490           !CGF.getLangOpts().CXXExceptions ||
1491           CGF.Builder.GetInsertBlock() == TopBlock ||
1492           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1493           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1494               TopBlock ||
1495           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1496               CGF.Builder.GetInsertBlock()) {
1497         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1498         // If value loaded in entry block, cache it and use it everywhere in
1499         // function.
1500         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1501           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1502           Elem.second.ThreadID = ThreadID;
1503         }
1504         return ThreadID;
1505       }
1506     }
1507   }
1508 
1509   // This is not an outlined function region - need to call __kmpc_int32
1510   // kmpc_global_thread_num(ident_t *loc).
1511   // Generate thread id value and cache this value for use across the
1512   // function.
1513   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1514   if (!Elem.second.ServiceInsertPt)
1515     setLocThreadIdInsertPt(CGF);
1516   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1517   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1518   llvm::CallInst *Call = CGF.Builder.CreateCall(
1519       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1520                                             OMPRTL___kmpc_global_thread_num),
1521       emitUpdateLocation(CGF, Loc));
1522   Call->setCallingConv(CGF.getRuntimeCC());
1523   Elem.second.ThreadID = Call;
1524   return Call;
1525 }
1526 
1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1528   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1529   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1530     clearLocThreadIdInsertPt(CGF);
1531     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1532   }
1533   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1534     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1535       UDRMap.erase(D);
1536     FunctionUDRMap.erase(CGF.CurFn);
1537   }
1538   auto I = FunctionUDMMap.find(CGF.CurFn);
1539   if (I != FunctionUDMMap.end()) {
1540     for(const auto *D : I->second)
1541       UDMMap.erase(D);
1542     FunctionUDMMap.erase(I);
1543   }
1544   LastprivateConditionalToTypes.erase(CGF.CurFn);
1545   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1549   return OMPBuilder.IdentPtr;
1550 }
1551 
1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1553   if (!Kmpc_MicroTy) {
1554     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1555     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1556                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1557     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1558   }
1559   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1560 }
1561 
1562 llvm::FunctionCallee
1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1564                                              bool IsGPUDistribute) {
1565   assert((IVSize == 32 || IVSize == 64) &&
1566          "IV size is not compatible with the omp runtime");
1567   StringRef Name;
1568   if (IsGPUDistribute)
1569     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1570                                     : "__kmpc_distribute_static_init_4u")
1571                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1572                                     : "__kmpc_distribute_static_init_8u");
1573   else
1574     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1575                                     : "__kmpc_for_static_init_4u")
1576                         : (IVSigned ? "__kmpc_for_static_init_8"
1577                                     : "__kmpc_for_static_init_8u");
1578 
1579   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1580   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1581   llvm::Type *TypeParams[] = {
1582     getIdentTyPointerTy(),                     // loc
1583     CGM.Int32Ty,                               // tid
1584     CGM.Int32Ty,                               // schedtype
1585     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1586     PtrTy,                                     // p_lower
1587     PtrTy,                                     // p_upper
1588     PtrTy,                                     // p_stride
1589     ITy,                                       // incr
1590     ITy                                        // chunk
1591   };
1592   auto *FnTy =
1593       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1594   return CGM.CreateRuntimeFunction(FnTy, Name);
1595 }
1596 
1597 llvm::FunctionCallee
1598 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1599   assert((IVSize == 32 || IVSize == 64) &&
1600          "IV size is not compatible with the omp runtime");
1601   StringRef Name =
1602       IVSize == 32
1603           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1604           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1605   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1606   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1607                                CGM.Int32Ty,           // tid
1608                                CGM.Int32Ty,           // schedtype
1609                                ITy,                   // lower
1610                                ITy,                   // upper
1611                                ITy,                   // stride
1612                                ITy                    // chunk
1613   };
1614   auto *FnTy =
1615       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1616   return CGM.CreateRuntimeFunction(FnTy, Name);
1617 }
1618 
1619 llvm::FunctionCallee
1620 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1621   assert((IVSize == 32 || IVSize == 64) &&
1622          "IV size is not compatible with the omp runtime");
1623   StringRef Name =
1624       IVSize == 32
1625           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1626           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1627   llvm::Type *TypeParams[] = {
1628       getIdentTyPointerTy(), // loc
1629       CGM.Int32Ty,           // tid
1630   };
1631   auto *FnTy =
1632       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1633   return CGM.CreateRuntimeFunction(FnTy, Name);
1634 }
1635 
1636 llvm::FunctionCallee
1637 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1638   assert((IVSize == 32 || IVSize == 64) &&
1639          "IV size is not compatible with the omp runtime");
1640   StringRef Name =
1641       IVSize == 32
1642           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1643           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1644   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1645   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1646   llvm::Type *TypeParams[] = {
1647     getIdentTyPointerTy(),                     // loc
1648     CGM.Int32Ty,                               // tid
1649     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1650     PtrTy,                                     // p_lower
1651     PtrTy,                                     // p_upper
1652     PtrTy                                      // p_stride
1653   };
1654   auto *FnTy =
1655       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1656   return CGM.CreateRuntimeFunction(FnTy, Name);
1657 }
1658 
1659 /// Obtain information that uniquely identifies a target entry. This
1660 /// consists of the file and device IDs as well as line number associated with
1661 /// the relevant entry source location.
1662 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1663                                      unsigned &DeviceID, unsigned &FileID,
1664                                      unsigned &LineNum) {
1665   SourceManager &SM = C.getSourceManager();
1666 
1667   // The loc should be always valid and have a file ID (the user cannot use
1668   // #pragma directives in macros)
1669 
1670   assert(Loc.isValid() && "Source location is expected to be always valid.");
1671 
1672   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1673   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1674 
1675   llvm::sys::fs::UniqueID ID;
1676   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1677     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1678     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1679     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1680       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1681           << PLoc.getFilename() << EC.message();
1682   }
1683 
1684   DeviceID = ID.getDevice();
1685   FileID = ID.getFile();
1686   LineNum = PLoc.getLine();
1687 }
1688 
1689 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1690   if (CGM.getLangOpts().OpenMPSimd)
1691     return Address::invalid();
1692   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1693       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1694   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1695               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1696                HasRequiresUnifiedSharedMemory))) {
1697     SmallString<64> PtrName;
1698     {
1699       llvm::raw_svector_ostream OS(PtrName);
1700       OS << CGM.getMangledName(GlobalDecl(VD));
1701       if (!VD->isExternallyVisible()) {
1702         unsigned DeviceID, FileID, Line;
1703         getTargetEntryUniqueInfo(CGM.getContext(),
1704                                  VD->getCanonicalDecl()->getBeginLoc(),
1705                                  DeviceID, FileID, Line);
1706         OS << llvm::format("_%x", FileID);
1707       }
1708       OS << "_decl_tgt_ref_ptr";
1709     }
1710     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1711     if (!Ptr) {
1712       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1713       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1714                                         PtrName);
1715 
1716       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1717       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1718 
1719       if (!CGM.getLangOpts().OpenMPIsDevice)
1720         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1721       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1722     }
1723     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1724   }
1725   return Address::invalid();
1726 }
1727 
1728 llvm::Constant *
1729 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1730   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1731          !CGM.getContext().getTargetInfo().isTLSSupported());
1732   // Lookup the entry, lazily creating it if necessary.
1733   std::string Suffix = getName({"cache", ""});
1734   return getOrCreateInternalVariable(
1735       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1736 }
1737 
1738 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1739                                                 const VarDecl *VD,
1740                                                 Address VDAddr,
1741                                                 SourceLocation Loc) {
1742   if (CGM.getLangOpts().OpenMPUseTLS &&
1743       CGM.getContext().getTargetInfo().isTLSSupported())
1744     return VDAddr;
1745 
1746   llvm::Type *VarTy = VDAddr.getElementType();
1747   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1748                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1749                                                        CGM.Int8PtrTy),
1750                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1751                          getOrCreateThreadPrivateCache(VD)};
1752   return Address(CGF.EmitRuntimeCall(
1753                      OMPBuilder.getOrCreateRuntimeFunction(
1754                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1755                      Args),
1756                  VDAddr.getAlignment());
1757 }
1758 
1759 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1760     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1761     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1762   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1763   // library.
1764   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1765   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1766                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1767                       OMPLoc);
1768   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1769   // to register constructor/destructor for variable.
1770   llvm::Value *Args[] = {
1771       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1772       Ctor, CopyCtor, Dtor};
1773   CGF.EmitRuntimeCall(
1774       OMPBuilder.getOrCreateRuntimeFunction(
1775           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1776       Args);
1777 }
1778 
1779 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1780     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1781     bool PerformInit, CodeGenFunction *CGF) {
1782   if (CGM.getLangOpts().OpenMPUseTLS &&
1783       CGM.getContext().getTargetInfo().isTLSSupported())
1784     return nullptr;
1785 
1786   VD = VD->getDefinition(CGM.getContext());
1787   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1788     QualType ASTTy = VD->getType();
1789 
1790     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1791     const Expr *Init = VD->getAnyInitializer();
1792     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1793       // Generate function that re-emits the declaration's initializer into the
1794       // threadprivate copy of the variable VD
1795       CodeGenFunction CtorCGF(CGM);
1796       FunctionArgList Args;
1797       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1798                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1799                             ImplicitParamDecl::Other);
1800       Args.push_back(&Dst);
1801 
1802       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1803           CGM.getContext().VoidPtrTy, Args);
1804       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1805       std::string Name = getName({"__kmpc_global_ctor_", ""});
1806       llvm::Function *Fn =
1807           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1808       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1809                             Args, Loc, Loc);
1810       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1811           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1812           CGM.getContext().VoidPtrTy, Dst.getLocation());
1813       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1814       Arg = CtorCGF.Builder.CreateElementBitCast(
1815           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1816       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1817                                /*IsInitializer=*/true);
1818       ArgVal = CtorCGF.EmitLoadOfScalar(
1819           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1820           CGM.getContext().VoidPtrTy, Dst.getLocation());
1821       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1822       CtorCGF.FinishFunction();
1823       Ctor = Fn;
1824     }
1825     if (VD->getType().isDestructedType() != QualType::DK_none) {
1826       // Generate function that emits destructor call for the threadprivate copy
1827       // of the variable VD
1828       CodeGenFunction DtorCGF(CGM);
1829       FunctionArgList Args;
1830       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1831                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1832                             ImplicitParamDecl::Other);
1833       Args.push_back(&Dst);
1834 
1835       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1836           CGM.getContext().VoidTy, Args);
1837       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1838       std::string Name = getName({"__kmpc_global_dtor_", ""});
1839       llvm::Function *Fn =
1840           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1841       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1842       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1843                             Loc, Loc);
1844       // Create a scope with an artificial location for the body of this function.
1845       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1846       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1847           DtorCGF.GetAddrOfLocalVar(&Dst),
1848           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1849       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1850                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1851                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1852       DtorCGF.FinishFunction();
1853       Dtor = Fn;
1854     }
1855     // Do not emit init function if it is not required.
1856     if (!Ctor && !Dtor)
1857       return nullptr;
1858 
1859     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1860     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1861                                                /*isVarArg=*/false)
1862                            ->getPointerTo();
1863     // Copying constructor for the threadprivate variable.
1864     // Must be NULL - reserved by runtime, but currently it requires that this
1865     // parameter is always NULL. Otherwise it fires assertion.
1866     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1867     if (Ctor == nullptr) {
1868       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1869                                              /*isVarArg=*/false)
1870                          ->getPointerTo();
1871       Ctor = llvm::Constant::getNullValue(CtorTy);
1872     }
1873     if (Dtor == nullptr) {
1874       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1875                                              /*isVarArg=*/false)
1876                          ->getPointerTo();
1877       Dtor = llvm::Constant::getNullValue(DtorTy);
1878     }
1879     if (!CGF) {
1880       auto *InitFunctionTy =
1881           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1882       std::string Name = getName({"__omp_threadprivate_init_", ""});
1883       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1884           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1885       CodeGenFunction InitCGF(CGM);
1886       FunctionArgList ArgList;
1887       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1888                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1889                             Loc, Loc);
1890       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1891       InitCGF.FinishFunction();
1892       return InitFunction;
1893     }
1894     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1895   }
1896   return nullptr;
1897 }
1898 
1899 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1900                                                      llvm::GlobalVariable *Addr,
1901                                                      bool PerformInit) {
1902   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1903       !CGM.getLangOpts().OpenMPIsDevice)
1904     return false;
1905   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1906       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1907   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1908       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1909        HasRequiresUnifiedSharedMemory))
1910     return CGM.getLangOpts().OpenMPIsDevice;
1911   VD = VD->getDefinition(CGM.getContext());
1912   assert(VD && "Unknown VarDecl");
1913 
1914   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1915     return CGM.getLangOpts().OpenMPIsDevice;
1916 
1917   QualType ASTTy = VD->getType();
1918   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1919 
1920   // Produce the unique prefix to identify the new target regions. We use
1921   // the source location of the variable declaration which we know to not
1922   // conflict with any target region.
1923   unsigned DeviceID;
1924   unsigned FileID;
1925   unsigned Line;
1926   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1927   SmallString<128> Buffer, Out;
1928   {
1929     llvm::raw_svector_ostream OS(Buffer);
1930     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1931        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1932   }
1933 
1934   const Expr *Init = VD->getAnyInitializer();
1935   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1936     llvm::Constant *Ctor;
1937     llvm::Constant *ID;
1938     if (CGM.getLangOpts().OpenMPIsDevice) {
1939       // Generate function that re-emits the declaration's initializer into
1940       // the threadprivate copy of the variable VD
1941       CodeGenFunction CtorCGF(CGM);
1942 
1943       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1944       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1945       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1946           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1947       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1948       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1949                             FunctionArgList(), Loc, Loc);
1950       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1951       CtorCGF.EmitAnyExprToMem(Init,
1952                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1953                                Init->getType().getQualifiers(),
1954                                /*IsInitializer=*/true);
1955       CtorCGF.FinishFunction();
1956       Ctor = Fn;
1957       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1958       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1959     } else {
1960       Ctor = new llvm::GlobalVariable(
1961           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1962           llvm::GlobalValue::PrivateLinkage,
1963           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1964       ID = Ctor;
1965     }
1966 
1967     // Register the information for the entry associated with the constructor.
1968     Out.clear();
1969     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1970         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1971         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1972   }
1973   if (VD->getType().isDestructedType() != QualType::DK_none) {
1974     llvm::Constant *Dtor;
1975     llvm::Constant *ID;
1976     if (CGM.getLangOpts().OpenMPIsDevice) {
1977       // Generate function that emits destructor call for the threadprivate
1978       // copy of the variable VD
1979       CodeGenFunction DtorCGF(CGM);
1980 
1981       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1982       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1983       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1984           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1985       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1986       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1987                             FunctionArgList(), Loc, Loc);
1988       // Create a scope with an artificial location for the body of this
1989       // function.
1990       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1991       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1992                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1993                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1994       DtorCGF.FinishFunction();
1995       Dtor = Fn;
1996       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1997       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1998     } else {
1999       Dtor = new llvm::GlobalVariable(
2000           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2001           llvm::GlobalValue::PrivateLinkage,
2002           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2003       ID = Dtor;
2004     }
2005     // Register the information for the entry associated with the destructor.
2006     Out.clear();
2007     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2008         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2009         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2010   }
2011   return CGM.getLangOpts().OpenMPIsDevice;
2012 }
2013 
2014 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2015                                                           QualType VarType,
2016                                                           StringRef Name) {
2017   std::string Suffix = getName({"artificial", ""});
2018   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2019   llvm::Value *GAddr =
2020       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2021   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2022       CGM.getTarget().isTLSSupported()) {
2023     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2024     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2025   }
2026   std::string CacheSuffix = getName({"cache", ""});
2027   llvm::Value *Args[] = {
2028       emitUpdateLocation(CGF, SourceLocation()),
2029       getThreadID(CGF, SourceLocation()),
2030       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2031       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2032                                 /*isSigned=*/false),
2033       getOrCreateInternalVariable(
2034           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2035   return Address(
2036       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2037           CGF.EmitRuntimeCall(
2038               OMPBuilder.getOrCreateRuntimeFunction(
2039                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2040               Args),
2041           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2042       CGM.getContext().getTypeAlignInChars(VarType));
2043 }
2044 
2045 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2046                                    const RegionCodeGenTy &ThenGen,
2047                                    const RegionCodeGenTy &ElseGen) {
2048   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2049 
2050   // If the condition constant folds and can be elided, try to avoid emitting
2051   // the condition and the dead arm of the if/else.
2052   bool CondConstant;
2053   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2054     if (CondConstant)
2055       ThenGen(CGF);
2056     else
2057       ElseGen(CGF);
2058     return;
2059   }
2060 
2061   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2062   // emit the conditional branch.
2063   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2064   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2065   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2066   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2067 
2068   // Emit the 'then' code.
2069   CGF.EmitBlock(ThenBlock);
2070   ThenGen(CGF);
2071   CGF.EmitBranch(ContBlock);
2072   // Emit the 'else' code if present.
2073   // There is no need to emit line number for unconditional branch.
2074   (void)ApplyDebugLocation::CreateEmpty(CGF);
2075   CGF.EmitBlock(ElseBlock);
2076   ElseGen(CGF);
2077   // There is no need to emit line number for unconditional branch.
2078   (void)ApplyDebugLocation::CreateEmpty(CGF);
2079   CGF.EmitBranch(ContBlock);
2080   // Emit the continuation block for code after the if.
2081   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2082 }
2083 
2084 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2085                                        llvm::Function *OutlinedFn,
2086                                        ArrayRef<llvm::Value *> CapturedVars,
2087                                        const Expr *IfCond) {
2088   if (!CGF.HaveInsertPoint())
2089     return;
2090   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2091   auto &M = CGM.getModule();
2092   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2093                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2094     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2095     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2096     llvm::Value *Args[] = {
2097         RTLoc,
2098         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2099         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2100     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2101     RealArgs.append(std::begin(Args), std::end(Args));
2102     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2103 
2104     llvm::FunctionCallee RTLFn =
2105         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2106     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2107   };
2108   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2109                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2110     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2111     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2112     // Build calls:
2113     // __kmpc_serialized_parallel(&Loc, GTid);
2114     llvm::Value *Args[] = {RTLoc, ThreadID};
2115     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2116                             M, OMPRTL___kmpc_serialized_parallel),
2117                         Args);
2118 
2119     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2120     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2121     Address ZeroAddrBound =
2122         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2123                                          /*Name=*/".bound.zero.addr");
2124     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2125     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2126     // ThreadId for serialized parallels is 0.
2127     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2128     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2129     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2130 
2131     // Ensure we do not inline the function. This is trivially true for the ones
2132     // passed to __kmpc_fork_call but the ones called in serialized regions
2133     // could be inlined. This is not a perfect but it is closer to the invariant
2134     // we want, namely, every data environment starts with a new function.
2135     // TODO: We should pass the if condition to the runtime function and do the
2136     //       handling there. Much cleaner code.
2137     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2138     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2139     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2140 
2141     // __kmpc_end_serialized_parallel(&Loc, GTid);
2142     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2143     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2144                             M, OMPRTL___kmpc_end_serialized_parallel),
2145                         EndArgs);
2146   };
2147   if (IfCond) {
2148     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2149   } else {
2150     RegionCodeGenTy ThenRCG(ThenGen);
2151     ThenRCG(CGF);
2152   }
2153 }
2154 
2155 // If we're inside an (outlined) parallel region, use the region info's
2156 // thread-ID variable (it is passed in a first argument of the outlined function
2157 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2158 // regular serial code region, get thread ID by calling kmp_int32
2159 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2160 // return the address of that temp.
2161 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2162                                              SourceLocation Loc) {
2163   if (auto *OMPRegionInfo =
2164           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2165     if (OMPRegionInfo->getThreadIDVariable())
2166       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2167 
2168   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2169   QualType Int32Ty =
2170       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2171   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2172   CGF.EmitStoreOfScalar(ThreadID,
2173                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2174 
2175   return ThreadIDTemp;
2176 }
2177 
2178 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2179     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2180   SmallString<256> Buffer;
2181   llvm::raw_svector_ostream Out(Buffer);
2182   Out << Name;
2183   StringRef RuntimeName = Out.str();
2184   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2185   if (Elem.second) {
2186     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2187            "OMP internal variable has different type than requested");
2188     return &*Elem.second;
2189   }
2190 
2191   return Elem.second = new llvm::GlobalVariable(
2192              CGM.getModule(), Ty, /*IsConstant*/ false,
2193              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2194              Elem.first(), /*InsertBefore=*/nullptr,
2195              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2196 }
2197 
2198 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2199   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2200   std::string Name = getName({Prefix, "var"});
2201   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2202 }
2203 
2204 namespace {
2205 /// Common pre(post)-action for different OpenMP constructs.
2206 class CommonActionTy final : public PrePostActionTy {
2207   llvm::FunctionCallee EnterCallee;
2208   ArrayRef<llvm::Value *> EnterArgs;
2209   llvm::FunctionCallee ExitCallee;
2210   ArrayRef<llvm::Value *> ExitArgs;
2211   bool Conditional;
2212   llvm::BasicBlock *ContBlock = nullptr;
2213 
2214 public:
2215   CommonActionTy(llvm::FunctionCallee EnterCallee,
2216                  ArrayRef<llvm::Value *> EnterArgs,
2217                  llvm::FunctionCallee ExitCallee,
2218                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2219       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2220         ExitArgs(ExitArgs), Conditional(Conditional) {}
2221   void Enter(CodeGenFunction &CGF) override {
2222     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2223     if (Conditional) {
2224       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2225       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2226       ContBlock = CGF.createBasicBlock("omp_if.end");
2227       // Generate the branch (If-stmt)
2228       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2229       CGF.EmitBlock(ThenBlock);
2230     }
2231   }
2232   void Done(CodeGenFunction &CGF) {
2233     // Emit the rest of blocks/branches
2234     CGF.EmitBranch(ContBlock);
2235     CGF.EmitBlock(ContBlock, true);
2236   }
2237   void Exit(CodeGenFunction &CGF) override {
2238     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2239   }
2240 };
2241 } // anonymous namespace
2242 
2243 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2244                                          StringRef CriticalName,
2245                                          const RegionCodeGenTy &CriticalOpGen,
2246                                          SourceLocation Loc, const Expr *Hint) {
2247   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2248   // CriticalOpGen();
2249   // __kmpc_end_critical(ident_t *, gtid, Lock);
2250   // Prepare arguments and build a call to __kmpc_critical
2251   if (!CGF.HaveInsertPoint())
2252     return;
2253   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2254                          getCriticalRegionLock(CriticalName)};
2255   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2256                                                 std::end(Args));
2257   if (Hint) {
2258     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2259         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2260   }
2261   CommonActionTy Action(
2262       OMPBuilder.getOrCreateRuntimeFunction(
2263           CGM.getModule(),
2264           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2265       EnterArgs,
2266       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2267                                             OMPRTL___kmpc_end_critical),
2268       Args);
2269   CriticalOpGen.setAction(Action);
2270   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2271 }
2272 
2273 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2274                                        const RegionCodeGenTy &MasterOpGen,
2275                                        SourceLocation Loc) {
2276   if (!CGF.HaveInsertPoint())
2277     return;
2278   // if(__kmpc_master(ident_t *, gtid)) {
2279   //   MasterOpGen();
2280   //   __kmpc_end_master(ident_t *, gtid);
2281   // }
2282   // Prepare arguments and build a call to __kmpc_master
2283   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2284   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2285                             CGM.getModule(), OMPRTL___kmpc_master),
2286                         Args,
2287                         OMPBuilder.getOrCreateRuntimeFunction(
2288                             CGM.getModule(), OMPRTL___kmpc_end_master),
2289                         Args,
2290                         /*Conditional=*/true);
2291   MasterOpGen.setAction(Action);
2292   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2293   Action.Done(CGF);
2294 }
2295 
2296 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2297                                        const RegionCodeGenTy &MaskedOpGen,
2298                                        SourceLocation Loc, const Expr *Filter) {
2299   if (!CGF.HaveInsertPoint())
2300     return;
2301   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2302   //   MaskedOpGen();
2303   //   __kmpc_end_masked(iden_t *, gtid);
2304   // }
2305   // Prepare arguments and build a call to __kmpc_masked
2306   llvm::Value *FilterVal = Filter
2307                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2308                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2309   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2310                          FilterVal};
2311   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2312                             getThreadID(CGF, Loc)};
2313   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2314                             CGM.getModule(), OMPRTL___kmpc_masked),
2315                         Args,
2316                         OMPBuilder.getOrCreateRuntimeFunction(
2317                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2318                         ArgsEnd,
2319                         /*Conditional=*/true);
2320   MaskedOpGen.setAction(Action);
2321   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2322   Action.Done(CGF);
2323 }
2324 
2325 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2326                                         SourceLocation Loc) {
2327   if (!CGF.HaveInsertPoint())
2328     return;
2329   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2330     OMPBuilder.createTaskyield(CGF.Builder);
2331   } else {
2332     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2333     llvm::Value *Args[] = {
2334         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2335         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2336     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2337                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2338                         Args);
2339   }
2340 
2341   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2342     Region->emitUntiedSwitch(CGF);
2343 }
2344 
2345 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2346                                           const RegionCodeGenTy &TaskgroupOpGen,
2347                                           SourceLocation Loc) {
2348   if (!CGF.HaveInsertPoint())
2349     return;
2350   // __kmpc_taskgroup(ident_t *, gtid);
2351   // TaskgroupOpGen();
2352   // __kmpc_end_taskgroup(ident_t *, gtid);
2353   // Prepare arguments and build a call to __kmpc_taskgroup
2354   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2355   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2357                         Args,
2358                         OMPBuilder.getOrCreateRuntimeFunction(
2359                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2360                         Args);
2361   TaskgroupOpGen.setAction(Action);
2362   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2363 }
2364 
2365 /// Given an array of pointers to variables, project the address of a
2366 /// given variable.
2367 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2368                                       unsigned Index, const VarDecl *Var) {
2369   // Pull out the pointer to the variable.
2370   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2371   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2372 
2373   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2374   Addr = CGF.Builder.CreateElementBitCast(
2375       Addr, CGF.ConvertTypeForMem(Var->getType()));
2376   return Addr;
2377 }
2378 
2379 static llvm::Value *emitCopyprivateCopyFunction(
2380     CodeGenModule &CGM, llvm::Type *ArgsType,
2381     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2382     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2383     SourceLocation Loc) {
2384   ASTContext &C = CGM.getContext();
2385   // void copy_func(void *LHSArg, void *RHSArg);
2386   FunctionArgList Args;
2387   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2388                            ImplicitParamDecl::Other);
2389   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2390                            ImplicitParamDecl::Other);
2391   Args.push_back(&LHSArg);
2392   Args.push_back(&RHSArg);
2393   const auto &CGFI =
2394       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2395   std::string Name =
2396       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2397   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2398                                     llvm::GlobalValue::InternalLinkage, Name,
2399                                     &CGM.getModule());
2400   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2401   Fn->setDoesNotRecurse();
2402   CodeGenFunction CGF(CGM);
2403   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2404   // Dest = (void*[n])(LHSArg);
2405   // Src = (void*[n])(RHSArg);
2406   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2408       ArgsType), CGF.getPointerAlign());
2409   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2410       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2411       ArgsType), CGF.getPointerAlign());
2412   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2413   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2414   // ...
2415   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2416   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2417     const auto *DestVar =
2418         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2419     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2420 
2421     const auto *SrcVar =
2422         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2423     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2424 
2425     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2426     QualType Type = VD->getType();
2427     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2428   }
2429   CGF.FinishFunction();
2430   return Fn;
2431 }
2432 
2433 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2434                                        const RegionCodeGenTy &SingleOpGen,
2435                                        SourceLocation Loc,
2436                                        ArrayRef<const Expr *> CopyprivateVars,
2437                                        ArrayRef<const Expr *> SrcExprs,
2438                                        ArrayRef<const Expr *> DstExprs,
2439                                        ArrayRef<const Expr *> AssignmentOps) {
2440   if (!CGF.HaveInsertPoint())
2441     return;
2442   assert(CopyprivateVars.size() == SrcExprs.size() &&
2443          CopyprivateVars.size() == DstExprs.size() &&
2444          CopyprivateVars.size() == AssignmentOps.size());
2445   ASTContext &C = CGM.getContext();
2446   // int32 did_it = 0;
2447   // if(__kmpc_single(ident_t *, gtid)) {
2448   //   SingleOpGen();
2449   //   __kmpc_end_single(ident_t *, gtid);
2450   //   did_it = 1;
2451   // }
2452   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2453   // <copy_func>, did_it);
2454 
2455   Address DidIt = Address::invalid();
2456   if (!CopyprivateVars.empty()) {
2457     // int32 did_it = 0;
2458     QualType KmpInt32Ty =
2459         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2460     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2461     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2462   }
2463   // Prepare arguments and build a call to __kmpc_single
2464   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2465   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2466                             CGM.getModule(), OMPRTL___kmpc_single),
2467                         Args,
2468                         OMPBuilder.getOrCreateRuntimeFunction(
2469                             CGM.getModule(), OMPRTL___kmpc_end_single),
2470                         Args,
2471                         /*Conditional=*/true);
2472   SingleOpGen.setAction(Action);
2473   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2474   if (DidIt.isValid()) {
2475     // did_it = 1;
2476     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2477   }
2478   Action.Done(CGF);
2479   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2480   // <copy_func>, did_it);
2481   if (DidIt.isValid()) {
2482     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2483     QualType CopyprivateArrayTy = C.getConstantArrayType(
2484         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2485         /*IndexTypeQuals=*/0);
2486     // Create a list of all private variables for copyprivate.
2487     Address CopyprivateList =
2488         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2489     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2490       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2491       CGF.Builder.CreateStore(
2492           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2493               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2494               CGF.VoidPtrTy),
2495           Elem);
2496     }
2497     // Build function that copies private values from single region to all other
2498     // threads in the corresponding parallel region.
2499     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2500         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2501         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2502     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2503     Address CL =
2504       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2505                                                       CGF.VoidPtrTy);
2506     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2507     llvm::Value *Args[] = {
2508         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2509         getThreadID(CGF, Loc),        // i32 <gtid>
2510         BufSize,                      // size_t <buf_size>
2511         CL.getPointer(),              // void *<copyprivate list>
2512         CpyFn,                        // void (*) (void *, void *) <copy_func>
2513         DidItVal                      // i32 did_it
2514     };
2515     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2516                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2517                         Args);
2518   }
2519 }
2520 
2521 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2522                                         const RegionCodeGenTy &OrderedOpGen,
2523                                         SourceLocation Loc, bool IsThreads) {
2524   if (!CGF.HaveInsertPoint())
2525     return;
2526   // __kmpc_ordered(ident_t *, gtid);
2527   // OrderedOpGen();
2528   // __kmpc_end_ordered(ident_t *, gtid);
2529   // Prepare arguments and build a call to __kmpc_ordered
2530   if (IsThreads) {
2531     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2532     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2533                               CGM.getModule(), OMPRTL___kmpc_ordered),
2534                           Args,
2535                           OMPBuilder.getOrCreateRuntimeFunction(
2536                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2537                           Args);
2538     OrderedOpGen.setAction(Action);
2539     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540     return;
2541   }
2542   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2543 }
2544 
2545 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2546   unsigned Flags;
2547   if (Kind == OMPD_for)
2548     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2549   else if (Kind == OMPD_sections)
2550     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2551   else if (Kind == OMPD_single)
2552     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2553   else if (Kind == OMPD_barrier)
2554     Flags = OMP_IDENT_BARRIER_EXPL;
2555   else
2556     Flags = OMP_IDENT_BARRIER_IMPL;
2557   return Flags;
2558 }
2559 
2560 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2561     CodeGenFunction &CGF, const OMPLoopDirective &S,
2562     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2563   // Check if the loop directive is actually a doacross loop directive. In this
2564   // case choose static, 1 schedule.
2565   if (llvm::any_of(
2566           S.getClausesOfKind<OMPOrderedClause>(),
2567           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2568     ScheduleKind = OMPC_SCHEDULE_static;
2569     // Chunk size is 1 in this case.
2570     llvm::APInt ChunkSize(32, 1);
2571     ChunkExpr = IntegerLiteral::Create(
2572         CGF.getContext(), ChunkSize,
2573         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2574         SourceLocation());
2575   }
2576 }
2577 
2578 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2579                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2580                                       bool ForceSimpleCall) {
2581   // Check if we should use the OMPBuilder
2582   auto *OMPRegionInfo =
2583       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2584   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2585     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2586         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2587     return;
2588   }
2589 
2590   if (!CGF.HaveInsertPoint())
2591     return;
2592   // Build call __kmpc_cancel_barrier(loc, thread_id);
2593   // Build call __kmpc_barrier(loc, thread_id);
2594   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2595   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2596   // thread_id);
2597   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2598                          getThreadID(CGF, Loc)};
2599   if (OMPRegionInfo) {
2600     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2601       llvm::Value *Result = CGF.EmitRuntimeCall(
2602           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2603                                                 OMPRTL___kmpc_cancel_barrier),
2604           Args);
2605       if (EmitChecks) {
2606         // if (__kmpc_cancel_barrier()) {
2607         //   exit from construct;
2608         // }
2609         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2610         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2611         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2612         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2613         CGF.EmitBlock(ExitBB);
2614         //   exit from construct;
2615         CodeGenFunction::JumpDest CancelDestination =
2616             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2617         CGF.EmitBranchThroughCleanup(CancelDestination);
2618         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2619       }
2620       return;
2621     }
2622   }
2623   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2624                           CGM.getModule(), OMPRTL___kmpc_barrier),
2625                       Args);
2626 }
2627 
2628 /// Map the OpenMP loop schedule to the runtime enumeration.
2629 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2630                                           bool Chunked, bool Ordered) {
2631   switch (ScheduleKind) {
2632   case OMPC_SCHEDULE_static:
2633     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2634                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2635   case OMPC_SCHEDULE_dynamic:
2636     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2637   case OMPC_SCHEDULE_guided:
2638     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2639   case OMPC_SCHEDULE_runtime:
2640     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2641   case OMPC_SCHEDULE_auto:
2642     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2643   case OMPC_SCHEDULE_unknown:
2644     assert(!Chunked && "chunk was specified but schedule kind not known");
2645     return Ordered ? OMP_ord_static : OMP_sch_static;
2646   }
2647   llvm_unreachable("Unexpected runtime schedule");
2648 }
2649 
2650 /// Map the OpenMP distribute schedule to the runtime enumeration.
2651 static OpenMPSchedType
2652 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2653   // only static is allowed for dist_schedule
2654   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2655 }
2656 
2657 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2658                                          bool Chunked) const {
2659   OpenMPSchedType Schedule =
2660       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2661   return Schedule == OMP_sch_static;
2662 }
2663 
2664 bool CGOpenMPRuntime::isStaticNonchunked(
2665     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2666   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2667   return Schedule == OMP_dist_sch_static;
2668 }
2669 
2670 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2671                                       bool Chunked) const {
2672   OpenMPSchedType Schedule =
2673       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2674   return Schedule == OMP_sch_static_chunked;
2675 }
2676 
2677 bool CGOpenMPRuntime::isStaticChunked(
2678     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2679   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2680   return Schedule == OMP_dist_sch_static_chunked;
2681 }
2682 
2683 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2684   OpenMPSchedType Schedule =
2685       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2686   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2687   return Schedule != OMP_sch_static;
2688 }
2689 
2690 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2691                                   OpenMPScheduleClauseModifier M1,
2692                                   OpenMPScheduleClauseModifier M2) {
2693   int Modifier = 0;
2694   switch (M1) {
2695   case OMPC_SCHEDULE_MODIFIER_monotonic:
2696     Modifier = OMP_sch_modifier_monotonic;
2697     break;
2698   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2699     Modifier = OMP_sch_modifier_nonmonotonic;
2700     break;
2701   case OMPC_SCHEDULE_MODIFIER_simd:
2702     if (Schedule == OMP_sch_static_chunked)
2703       Schedule = OMP_sch_static_balanced_chunked;
2704     break;
2705   case OMPC_SCHEDULE_MODIFIER_last:
2706   case OMPC_SCHEDULE_MODIFIER_unknown:
2707     break;
2708   }
2709   switch (M2) {
2710   case OMPC_SCHEDULE_MODIFIER_monotonic:
2711     Modifier = OMP_sch_modifier_monotonic;
2712     break;
2713   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2714     Modifier = OMP_sch_modifier_nonmonotonic;
2715     break;
2716   case OMPC_SCHEDULE_MODIFIER_simd:
2717     if (Schedule == OMP_sch_static_chunked)
2718       Schedule = OMP_sch_static_balanced_chunked;
2719     break;
2720   case OMPC_SCHEDULE_MODIFIER_last:
2721   case OMPC_SCHEDULE_MODIFIER_unknown:
2722     break;
2723   }
2724   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2725   // If the static schedule kind is specified or if the ordered clause is
2726   // specified, and if the nonmonotonic modifier is not specified, the effect is
2727   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2728   // modifier is specified, the effect is as if the nonmonotonic modifier is
2729   // specified.
2730   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2731     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2732           Schedule == OMP_sch_static_balanced_chunked ||
2733           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2734           Schedule == OMP_dist_sch_static_chunked ||
2735           Schedule == OMP_dist_sch_static))
2736       Modifier = OMP_sch_modifier_nonmonotonic;
2737   }
2738   return Schedule | Modifier;
2739 }
2740 
2741 void CGOpenMPRuntime::emitForDispatchInit(
2742     CodeGenFunction &CGF, SourceLocation Loc,
2743     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2744     bool Ordered, const DispatchRTInput &DispatchValues) {
2745   if (!CGF.HaveInsertPoint())
2746     return;
2747   OpenMPSchedType Schedule = getRuntimeSchedule(
2748       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2749   assert(Ordered ||
2750          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2751           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2752           Schedule != OMP_sch_static_balanced_chunked));
2753   // Call __kmpc_dispatch_init(
2754   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2755   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2756   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2757 
2758   // If the Chunk was not specified in the clause - use default value 1.
2759   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2760                                             : CGF.Builder.getIntN(IVSize, 1);
2761   llvm::Value *Args[] = {
2762       emitUpdateLocation(CGF, Loc),
2763       getThreadID(CGF, Loc),
2764       CGF.Builder.getInt32(addMonoNonMonoModifier(
2765           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2766       DispatchValues.LB,                                     // Lower
2767       DispatchValues.UB,                                     // Upper
2768       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2769       Chunk                                                  // Chunk
2770   };
2771   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2772 }
2773 
2774 static void emitForStaticInitCall(
2775     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2776     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2777     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2778     const CGOpenMPRuntime::StaticRTInput &Values) {
2779   if (!CGF.HaveInsertPoint())
2780     return;
2781 
2782   assert(!Values.Ordered);
2783   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2784          Schedule == OMP_sch_static_balanced_chunked ||
2785          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2786          Schedule == OMP_dist_sch_static ||
2787          Schedule == OMP_dist_sch_static_chunked);
2788 
2789   // Call __kmpc_for_static_init(
2790   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2791   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2792   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2793   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2794   llvm::Value *Chunk = Values.Chunk;
2795   if (Chunk == nullptr) {
2796     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2797             Schedule == OMP_dist_sch_static) &&
2798            "expected static non-chunked schedule");
2799     // If the Chunk was not specified in the clause - use default value 1.
2800     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2801   } else {
2802     assert((Schedule == OMP_sch_static_chunked ||
2803             Schedule == OMP_sch_static_balanced_chunked ||
2804             Schedule == OMP_ord_static_chunked ||
2805             Schedule == OMP_dist_sch_static_chunked) &&
2806            "expected static chunked schedule");
2807   }
2808   llvm::Value *Args[] = {
2809       UpdateLocation,
2810       ThreadId,
2811       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2812                                                   M2)), // Schedule type
2813       Values.IL.getPointer(),                           // &isLastIter
2814       Values.LB.getPointer(),                           // &LB
2815       Values.UB.getPointer(),                           // &UB
2816       Values.ST.getPointer(),                           // &Stride
2817       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2818       Chunk                                             // Chunk
2819   };
2820   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2821 }
2822 
2823 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2824                                         SourceLocation Loc,
2825                                         OpenMPDirectiveKind DKind,
2826                                         const OpenMPScheduleTy &ScheduleKind,
2827                                         const StaticRTInput &Values) {
2828   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2829       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2830   assert(isOpenMPWorksharingDirective(DKind) &&
2831          "Expected loop-based or sections-based directive.");
2832   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2833                                              isOpenMPLoopDirective(DKind)
2834                                                  ? OMP_IDENT_WORK_LOOP
2835                                                  : OMP_IDENT_WORK_SECTIONS);
2836   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2837   llvm::FunctionCallee StaticInitFunction =
2838       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2839   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2840   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2841                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2842 }
2843 
2844 void CGOpenMPRuntime::emitDistributeStaticInit(
2845     CodeGenFunction &CGF, SourceLocation Loc,
2846     OpenMPDistScheduleClauseKind SchedKind,
2847     const CGOpenMPRuntime::StaticRTInput &Values) {
2848   OpenMPSchedType ScheduleNum =
2849       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2850   llvm::Value *UpdatedLocation =
2851       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2852   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2853   llvm::FunctionCallee StaticInitFunction;
2854   bool isGPUDistribute =
2855       CGM.getLangOpts().OpenMPIsDevice &&
2856       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2857   StaticInitFunction = createForStaticInitFunction(
2858       Values.IVSize, Values.IVSigned, isGPUDistribute);
2859 
2860   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2861                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2862                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2863 }
2864 
2865 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2866                                           SourceLocation Loc,
2867                                           OpenMPDirectiveKind DKind) {
2868   if (!CGF.HaveInsertPoint())
2869     return;
2870   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2871   llvm::Value *Args[] = {
2872       emitUpdateLocation(CGF, Loc,
2873                          isOpenMPDistributeDirective(DKind)
2874                              ? OMP_IDENT_WORK_DISTRIBUTE
2875                              : isOpenMPLoopDirective(DKind)
2876                                    ? OMP_IDENT_WORK_LOOP
2877                                    : OMP_IDENT_WORK_SECTIONS),
2878       getThreadID(CGF, Loc)};
2879   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2880   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2881       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2882     CGF.EmitRuntimeCall(
2883         OMPBuilder.getOrCreateRuntimeFunction(
2884             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2885         Args);
2886   else
2887     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2888                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2889                         Args);
2890 }
2891 
2892 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2893                                                  SourceLocation Loc,
2894                                                  unsigned IVSize,
2895                                                  bool IVSigned) {
2896   if (!CGF.HaveInsertPoint())
2897     return;
2898   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2899   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2900   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2901 }
2902 
2903 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2904                                           SourceLocation Loc, unsigned IVSize,
2905                                           bool IVSigned, Address IL,
2906                                           Address LB, Address UB,
2907                                           Address ST) {
2908   // Call __kmpc_dispatch_next(
2909   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2910   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2911   //          kmp_int[32|64] *p_stride);
2912   llvm::Value *Args[] = {
2913       emitUpdateLocation(CGF, Loc),
2914       getThreadID(CGF, Loc),
2915       IL.getPointer(), // &isLastIter
2916       LB.getPointer(), // &Lower
2917       UB.getPointer(), // &Upper
2918       ST.getPointer()  // &Stride
2919   };
2920   llvm::Value *Call =
2921       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2922   return CGF.EmitScalarConversion(
2923       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2924       CGF.getContext().BoolTy, Loc);
2925 }
2926 
2927 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2928                                            llvm::Value *NumThreads,
2929                                            SourceLocation Loc) {
2930   if (!CGF.HaveInsertPoint())
2931     return;
2932   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2933   llvm::Value *Args[] = {
2934       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2935       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2936   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2937                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2938                       Args);
2939 }
2940 
2941 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2942                                          ProcBindKind ProcBind,
2943                                          SourceLocation Loc) {
2944   if (!CGF.HaveInsertPoint())
2945     return;
2946   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2947   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2948   llvm::Value *Args[] = {
2949       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2950       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2951   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2952                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2953                       Args);
2954 }
2955 
2956 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2957                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2958   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2959     OMPBuilder.createFlush(CGF.Builder);
2960   } else {
2961     if (!CGF.HaveInsertPoint())
2962       return;
2963     // Build call void __kmpc_flush(ident_t *loc)
2964     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2965                             CGM.getModule(), OMPRTL___kmpc_flush),
2966                         emitUpdateLocation(CGF, Loc));
2967   }
2968 }
2969 
2970 namespace {
2971 /// Indexes of fields for type kmp_task_t.
2972 enum KmpTaskTFields {
2973   /// List of shared variables.
2974   KmpTaskTShareds,
2975   /// Task routine.
2976   KmpTaskTRoutine,
2977   /// Partition id for the untied tasks.
2978   KmpTaskTPartId,
2979   /// Function with call of destructors for private variables.
2980   Data1,
2981   /// Task priority.
2982   Data2,
2983   /// (Taskloops only) Lower bound.
2984   KmpTaskTLowerBound,
2985   /// (Taskloops only) Upper bound.
2986   KmpTaskTUpperBound,
2987   /// (Taskloops only) Stride.
2988   KmpTaskTStride,
2989   /// (Taskloops only) Is last iteration flag.
2990   KmpTaskTLastIter,
2991   /// (Taskloops only) Reduction data.
2992   KmpTaskTReductions,
2993 };
2994 } // anonymous namespace
2995 
2996 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2997   return OffloadEntriesTargetRegion.empty() &&
2998          OffloadEntriesDeviceGlobalVar.empty();
2999 }
3000 
3001 /// Initialize target region entry.
3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3003     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3004                                     StringRef ParentName, unsigned LineNum,
3005                                     unsigned Order) {
3006   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3007                                              "only required for the device "
3008                                              "code generation.");
3009   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3010       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3011                                    OMPTargetRegionEntryTargetRegion);
3012   ++OffloadingEntriesNum;
3013 }
3014 
3015 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3016     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3017                                   StringRef ParentName, unsigned LineNum,
3018                                   llvm::Constant *Addr, llvm::Constant *ID,
3019                                   OMPTargetRegionEntryKind Flags) {
3020   // If we are emitting code for a target, the entry is already initialized,
3021   // only has to be registered.
3022   if (CGM.getLangOpts().OpenMPIsDevice) {
3023     // This could happen if the device compilation is invoked standalone.
3024     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3025       return;
3026     auto &Entry =
3027         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3028     Entry.setAddress(Addr);
3029     Entry.setID(ID);
3030     Entry.setFlags(Flags);
3031   } else {
3032     if (Flags ==
3033             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3034         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3035                                  /*IgnoreAddressId*/ true))
3036       return;
3037     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3038            "Target region entry already registered!");
3039     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3040     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3041     ++OffloadingEntriesNum;
3042   }
3043 }
3044 
3045 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3046     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3047     bool IgnoreAddressId) const {
3048   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3049   if (PerDevice == OffloadEntriesTargetRegion.end())
3050     return false;
3051   auto PerFile = PerDevice->second.find(FileID);
3052   if (PerFile == PerDevice->second.end())
3053     return false;
3054   auto PerParentName = PerFile->second.find(ParentName);
3055   if (PerParentName == PerFile->second.end())
3056     return false;
3057   auto PerLine = PerParentName->second.find(LineNum);
3058   if (PerLine == PerParentName->second.end())
3059     return false;
3060   // Fail if this entry is already registered.
3061   if (!IgnoreAddressId &&
3062       (PerLine->second.getAddress() || PerLine->second.getID()))
3063     return false;
3064   return true;
3065 }
3066 
3067 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3068     const OffloadTargetRegionEntryInfoActTy &Action) {
3069   // Scan all target region entries and perform the provided action.
3070   for (const auto &D : OffloadEntriesTargetRegion)
3071     for (const auto &F : D.second)
3072       for (const auto &P : F.second)
3073         for (const auto &L : P.second)
3074           Action(D.first, F.first, P.first(), L.first, L.second);
3075 }
3076 
3077 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3078     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3079                                        OMPTargetGlobalVarEntryKind Flags,
3080                                        unsigned Order) {
3081   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3082                                              "only required for the device "
3083                                              "code generation.");
3084   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3085   ++OffloadingEntriesNum;
3086 }
3087 
3088 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3089     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3090                                      CharUnits VarSize,
3091                                      OMPTargetGlobalVarEntryKind Flags,
3092                                      llvm::GlobalValue::LinkageTypes Linkage) {
3093   if (CGM.getLangOpts().OpenMPIsDevice) {
3094     // This could happen if the device compilation is invoked standalone.
3095     if (!hasDeviceGlobalVarEntryInfo(VarName))
3096       return;
3097     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3098     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3099       if (Entry.getVarSize().isZero()) {
3100         Entry.setVarSize(VarSize);
3101         Entry.setLinkage(Linkage);
3102       }
3103       return;
3104     }
3105     Entry.setVarSize(VarSize);
3106     Entry.setLinkage(Linkage);
3107     Entry.setAddress(Addr);
3108   } else {
3109     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3110       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3111       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3112              "Entry not initialized!");
3113       if (Entry.getVarSize().isZero()) {
3114         Entry.setVarSize(VarSize);
3115         Entry.setLinkage(Linkage);
3116       }
3117       return;
3118     }
3119     OffloadEntriesDeviceGlobalVar.try_emplace(
3120         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3121     ++OffloadingEntriesNum;
3122   }
3123 }
3124 
3125 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3126     actOnDeviceGlobalVarEntriesInfo(
3127         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3128   // Scan all target region entries and perform the provided action.
3129   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3130     Action(E.getKey(), E.getValue());
3131 }
3132 
3133 void CGOpenMPRuntime::createOffloadEntry(
3134     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3135     llvm::GlobalValue::LinkageTypes Linkage) {
3136   StringRef Name = Addr->getName();
3137   llvm::Module &M = CGM.getModule();
3138   llvm::LLVMContext &C = M.getContext();
3139 
3140   // Create constant string with the name.
3141   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3142 
3143   std::string StringName = getName({"omp_offloading", "entry_name"});
3144   auto *Str = new llvm::GlobalVariable(
3145       M, StrPtrInit->getType(), /*isConstant=*/true,
3146       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3147   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3148 
3149   llvm::Constant *Data[] = {
3150       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3151       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3152       llvm::ConstantInt::get(CGM.SizeTy, Size),
3153       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3154       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3155   std::string EntryName = getName({"omp_offloading", "entry", ""});
3156   llvm::GlobalVariable *Entry = createGlobalStruct(
3157       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3158       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3159 
3160   // The entry has to be created in the section the linker expects it to be.
3161   Entry->setSection("omp_offloading_entries");
3162 }
3163 
3164 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3165   // Emit the offloading entries and metadata so that the device codegen side
3166   // can easily figure out what to emit. The produced metadata looks like
3167   // this:
3168   //
3169   // !omp_offload.info = !{!1, ...}
3170   //
3171   // Right now we only generate metadata for function that contain target
3172   // regions.
3173 
3174   // If we are in simd mode or there are no entries, we don't need to do
3175   // anything.
3176   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3177     return;
3178 
3179   llvm::Module &M = CGM.getModule();
3180   llvm::LLVMContext &C = M.getContext();
3181   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3182                          SourceLocation, StringRef>,
3183               16>
3184       OrderedEntries(OffloadEntriesInfoManager.size());
3185   llvm::SmallVector<StringRef, 16> ParentFunctions(
3186       OffloadEntriesInfoManager.size());
3187 
3188   // Auxiliary methods to create metadata values and strings.
3189   auto &&GetMDInt = [this](unsigned V) {
3190     return llvm::ConstantAsMetadata::get(
3191         llvm::ConstantInt::get(CGM.Int32Ty, V));
3192   };
3193 
3194   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3195 
3196   // Create the offloading info metadata node.
3197   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3198 
3199   // Create function that emits metadata for each target region entry;
3200   auto &&TargetRegionMetadataEmitter =
3201       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3202        &GetMDString](
3203           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3204           unsigned Line,
3205           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3206         // Generate metadata for target regions. Each entry of this metadata
3207         // contains:
3208         // - Entry 0 -> Kind of this type of metadata (0).
3209         // - Entry 1 -> Device ID of the file where the entry was identified.
3210         // - Entry 2 -> File ID of the file where the entry was identified.
3211         // - Entry 3 -> Mangled name of the function where the entry was
3212         // identified.
3213         // - Entry 4 -> Line in the file where the entry was identified.
3214         // - Entry 5 -> Order the entry was created.
3215         // The first element of the metadata node is the kind.
3216         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3217                                  GetMDInt(FileID),      GetMDString(ParentName),
3218                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3219 
3220         SourceLocation Loc;
3221         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3222                   E = CGM.getContext().getSourceManager().fileinfo_end();
3223              I != E; ++I) {
3224           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3225               I->getFirst()->getUniqueID().getFile() == FileID) {
3226             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3227                 I->getFirst(), Line, 1);
3228             break;
3229           }
3230         }
3231         // Save this entry in the right position of the ordered entries array.
3232         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3233         ParentFunctions[E.getOrder()] = ParentName;
3234 
3235         // Add metadata to the named metadata node.
3236         MD->addOperand(llvm::MDNode::get(C, Ops));
3237       };
3238 
3239   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3240       TargetRegionMetadataEmitter);
3241 
3242   // Create function that emits metadata for each device global variable entry;
3243   auto &&DeviceGlobalVarMetadataEmitter =
3244       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3245        MD](StringRef MangledName,
3246            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3247                &E) {
3248         // Generate metadata for global variables. Each entry of this metadata
3249         // contains:
3250         // - Entry 0 -> Kind of this type of metadata (1).
3251         // - Entry 1 -> Mangled name of the variable.
3252         // - Entry 2 -> Declare target kind.
3253         // - Entry 3 -> Order the entry was created.
3254         // The first element of the metadata node is the kind.
3255         llvm::Metadata *Ops[] = {
3256             GetMDInt(E.getKind()), GetMDString(MangledName),
3257             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3258 
3259         // Save this entry in the right position of the ordered entries array.
3260         OrderedEntries[E.getOrder()] =
3261             std::make_tuple(&E, SourceLocation(), MangledName);
3262 
3263         // Add metadata to the named metadata node.
3264         MD->addOperand(llvm::MDNode::get(C, Ops));
3265       };
3266 
3267   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3268       DeviceGlobalVarMetadataEmitter);
3269 
3270   for (const auto &E : OrderedEntries) {
3271     assert(std::get<0>(E) && "All ordered entries must exist!");
3272     if (const auto *CE =
3273             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3274                 std::get<0>(E))) {
3275       if (!CE->getID() || !CE->getAddress()) {
3276         // Do not blame the entry if the parent funtion is not emitted.
3277         StringRef FnName = ParentFunctions[CE->getOrder()];
3278         if (!CGM.GetGlobalValue(FnName))
3279           continue;
3280         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3281             DiagnosticsEngine::Error,
3282             "Offloading entry for target region in %0 is incorrect: either the "
3283             "address or the ID is invalid.");
3284         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3285         continue;
3286       }
3287       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3288                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3289     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3290                                              OffloadEntryInfoDeviceGlobalVar>(
3291                    std::get<0>(E))) {
3292       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3293           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3294               CE->getFlags());
3295       switch (Flags) {
3296       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3297         if (CGM.getLangOpts().OpenMPIsDevice &&
3298             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3299           continue;
3300         if (!CE->getAddress()) {
3301           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3302               DiagnosticsEngine::Error, "Offloading entry for declare target "
3303                                         "variable %0 is incorrect: the "
3304                                         "address is invalid.");
3305           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3306           continue;
3307         }
3308         // The vaiable has no definition - no need to add the entry.
3309         if (CE->getVarSize().isZero())
3310           continue;
3311         break;
3312       }
3313       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3314         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3315                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3316                "Declaret target link address is set.");
3317         if (CGM.getLangOpts().OpenMPIsDevice)
3318           continue;
3319         if (!CE->getAddress()) {
3320           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3321               DiagnosticsEngine::Error,
3322               "Offloading entry for declare target variable is incorrect: the "
3323               "address is invalid.");
3324           CGM.getDiags().Report(DiagID);
3325           continue;
3326         }
3327         break;
3328       }
3329       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3330                          CE->getVarSize().getQuantity(), Flags,
3331                          CE->getLinkage());
3332     } else {
3333       llvm_unreachable("Unsupported entry kind.");
3334     }
3335   }
3336 }
3337 
3338 /// Loads all the offload entries information from the host IR
3339 /// metadata.
3340 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3341   // If we are in target mode, load the metadata from the host IR. This code has
3342   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3343 
3344   if (!CGM.getLangOpts().OpenMPIsDevice)
3345     return;
3346 
3347   if (CGM.getLangOpts().OMPHostIRFile.empty())
3348     return;
3349 
3350   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3351   if (auto EC = Buf.getError()) {
3352     CGM.getDiags().Report(diag::err_cannot_open_file)
3353         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3354     return;
3355   }
3356 
3357   llvm::LLVMContext C;
3358   auto ME = expectedToErrorOrAndEmitErrors(
3359       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3360 
3361   if (auto EC = ME.getError()) {
3362     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3363         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3364     CGM.getDiags().Report(DiagID)
3365         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3366     return;
3367   }
3368 
3369   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3370   if (!MD)
3371     return;
3372 
3373   for (llvm::MDNode *MN : MD->operands()) {
3374     auto &&GetMDInt = [MN](unsigned Idx) {
3375       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3376       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3377     };
3378 
3379     auto &&GetMDString = [MN](unsigned Idx) {
3380       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3381       return V->getString();
3382     };
3383 
3384     switch (GetMDInt(0)) {
3385     default:
3386       llvm_unreachable("Unexpected metadata!");
3387       break;
3388     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3389         OffloadingEntryInfoTargetRegion:
3390       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3391           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3392           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3393           /*Order=*/GetMDInt(5));
3394       break;
3395     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3396         OffloadingEntryInfoDeviceGlobalVar:
3397       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3398           /*MangledName=*/GetMDString(1),
3399           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3400               /*Flags=*/GetMDInt(2)),
3401           /*Order=*/GetMDInt(3));
3402       break;
3403     }
3404   }
3405 }
3406 
3407 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3408   if (!KmpRoutineEntryPtrTy) {
3409     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3410     ASTContext &C = CGM.getContext();
3411     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3412     FunctionProtoType::ExtProtoInfo EPI;
3413     KmpRoutineEntryPtrQTy = C.getPointerType(
3414         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3415     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3416   }
3417 }
3418 
3419 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3420   // Make sure the type of the entry is already created. This is the type we
3421   // have to create:
3422   // struct __tgt_offload_entry{
3423   //   void      *addr;       // Pointer to the offload entry info.
3424   //                          // (function or global)
3425   //   char      *name;       // Name of the function or global.
3426   //   size_t     size;       // Size of the entry info (0 if it a function).
3427   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3428   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3429   // };
3430   if (TgtOffloadEntryQTy.isNull()) {
3431     ASTContext &C = CGM.getContext();
3432     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3433     RD->startDefinition();
3434     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3435     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3436     addFieldToRecordDecl(C, RD, C.getSizeType());
3437     addFieldToRecordDecl(
3438         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3439     addFieldToRecordDecl(
3440         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3441     RD->completeDefinition();
3442     RD->addAttr(PackedAttr::CreateImplicit(C));
3443     TgtOffloadEntryQTy = C.getRecordType(RD);
3444   }
3445   return TgtOffloadEntryQTy;
3446 }
3447 
3448 namespace {
3449 struct PrivateHelpersTy {
3450   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3451                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3452       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3453         PrivateElemInit(PrivateElemInit) {}
3454   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3455   const Expr *OriginalRef = nullptr;
3456   const VarDecl *Original = nullptr;
3457   const VarDecl *PrivateCopy = nullptr;
3458   const VarDecl *PrivateElemInit = nullptr;
3459   bool isLocalPrivate() const {
3460     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3461   }
3462 };
3463 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3464 } // anonymous namespace
3465 
3466 static bool isAllocatableDecl(const VarDecl *VD) {
3467   const VarDecl *CVD = VD->getCanonicalDecl();
3468   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3469     return false;
3470   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3471   // Use the default allocation.
3472   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3473             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3474            !AA->getAllocator());
3475 }
3476 
3477 static RecordDecl *
3478 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3479   if (!Privates.empty()) {
3480     ASTContext &C = CGM.getContext();
3481     // Build struct .kmp_privates_t. {
3482     //         /*  private vars  */
3483     //       };
3484     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3485     RD->startDefinition();
3486     for (const auto &Pair : Privates) {
3487       const VarDecl *VD = Pair.second.Original;
3488       QualType Type = VD->getType().getNonReferenceType();
3489       // If the private variable is a local variable with lvalue ref type,
3490       // allocate the pointer instead of the pointee type.
3491       if (Pair.second.isLocalPrivate()) {
3492         if (VD->getType()->isLValueReferenceType())
3493           Type = C.getPointerType(Type);
3494         if (isAllocatableDecl(VD))
3495           Type = C.getPointerType(Type);
3496       }
3497       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3498       if (VD->hasAttrs()) {
3499         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3500              E(VD->getAttrs().end());
3501              I != E; ++I)
3502           FD->addAttr(*I);
3503       }
3504     }
3505     RD->completeDefinition();
3506     return RD;
3507   }
3508   return nullptr;
3509 }
3510 
3511 static RecordDecl *
3512 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3513                          QualType KmpInt32Ty,
3514                          QualType KmpRoutineEntryPointerQTy) {
3515   ASTContext &C = CGM.getContext();
3516   // Build struct kmp_task_t {
3517   //         void *              shareds;
3518   //         kmp_routine_entry_t routine;
3519   //         kmp_int32           part_id;
3520   //         kmp_cmplrdata_t data1;
3521   //         kmp_cmplrdata_t data2;
3522   // For taskloops additional fields:
3523   //         kmp_uint64          lb;
3524   //         kmp_uint64          ub;
3525   //         kmp_int64           st;
3526   //         kmp_int32           liter;
3527   //         void *              reductions;
3528   //       };
3529   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3530   UD->startDefinition();
3531   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3532   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3533   UD->completeDefinition();
3534   QualType KmpCmplrdataTy = C.getRecordType(UD);
3535   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3536   RD->startDefinition();
3537   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3538   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3539   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3540   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3541   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3542   if (isOpenMPTaskLoopDirective(Kind)) {
3543     QualType KmpUInt64Ty =
3544         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3545     QualType KmpInt64Ty =
3546         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3547     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3548     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3549     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3550     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3551     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3552   }
3553   RD->completeDefinition();
3554   return RD;
3555 }
3556 
3557 static RecordDecl *
3558 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3559                                      ArrayRef<PrivateDataTy> Privates) {
3560   ASTContext &C = CGM.getContext();
3561   // Build struct kmp_task_t_with_privates {
3562   //         kmp_task_t task_data;
3563   //         .kmp_privates_t. privates;
3564   //       };
3565   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3566   RD->startDefinition();
3567   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3568   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3569     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3570   RD->completeDefinition();
3571   return RD;
3572 }
3573 
3574 /// Emit a proxy function which accepts kmp_task_t as the second
3575 /// argument.
3576 /// \code
3577 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3578 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3579 ///   For taskloops:
3580 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3581 ///   tt->reductions, tt->shareds);
3582 ///   return 0;
3583 /// }
3584 /// \endcode
3585 static llvm::Function *
3586 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3587                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3588                       QualType KmpTaskTWithPrivatesPtrQTy,
3589                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3590                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3591                       llvm::Value *TaskPrivatesMap) {
3592   ASTContext &C = CGM.getContext();
3593   FunctionArgList Args;
3594   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3595                             ImplicitParamDecl::Other);
3596   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3597                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3598                                 ImplicitParamDecl::Other);
3599   Args.push_back(&GtidArg);
3600   Args.push_back(&TaskTypeArg);
3601   const auto &TaskEntryFnInfo =
3602       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3603   llvm::FunctionType *TaskEntryTy =
3604       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3605   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3606   auto *TaskEntry = llvm::Function::Create(
3607       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3608   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3609   TaskEntry->setDoesNotRecurse();
3610   CodeGenFunction CGF(CGM);
3611   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3612                     Loc, Loc);
3613 
3614   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3615   // tt,
3616   // For taskloops:
3617   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3618   // tt->task_data.shareds);
3619   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3620       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3621   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3622       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3623       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3624   const auto *KmpTaskTWithPrivatesQTyRD =
3625       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3626   LValue Base =
3627       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3628   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3629   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3630   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3631   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3632 
3633   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3634   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3635   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3636       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3637       CGF.ConvertTypeForMem(SharedsPtrTy));
3638 
3639   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3640   llvm::Value *PrivatesParam;
3641   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3642     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3643     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3644         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3645   } else {
3646     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3647   }
3648 
3649   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3650                                TaskPrivatesMap,
3651                                CGF.Builder
3652                                    .CreatePointerBitCastOrAddrSpaceCast(
3653                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3654                                    .getPointer()};
3655   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3656                                           std::end(CommonArgs));
3657   if (isOpenMPTaskLoopDirective(Kind)) {
3658     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3659     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3660     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3661     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3662     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3663     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3664     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3665     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3666     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3667     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3668     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3669     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3670     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3671     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3672     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3673     CallArgs.push_back(LBParam);
3674     CallArgs.push_back(UBParam);
3675     CallArgs.push_back(StParam);
3676     CallArgs.push_back(LIParam);
3677     CallArgs.push_back(RParam);
3678   }
3679   CallArgs.push_back(SharedsParam);
3680 
3681   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3682                                                   CallArgs);
3683   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3684                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3685   CGF.FinishFunction();
3686   return TaskEntry;
3687 }
3688 
3689 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3690                                             SourceLocation Loc,
3691                                             QualType KmpInt32Ty,
3692                                             QualType KmpTaskTWithPrivatesPtrQTy,
3693                                             QualType KmpTaskTWithPrivatesQTy) {
3694   ASTContext &C = CGM.getContext();
3695   FunctionArgList Args;
3696   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3697                             ImplicitParamDecl::Other);
3698   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3699                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3700                                 ImplicitParamDecl::Other);
3701   Args.push_back(&GtidArg);
3702   Args.push_back(&TaskTypeArg);
3703   const auto &DestructorFnInfo =
3704       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3705   llvm::FunctionType *DestructorFnTy =
3706       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3707   std::string Name =
3708       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3709   auto *DestructorFn =
3710       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3711                              Name, &CGM.getModule());
3712   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3713                                     DestructorFnInfo);
3714   DestructorFn->setDoesNotRecurse();
3715   CodeGenFunction CGF(CGM);
3716   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3717                     Args, Loc, Loc);
3718 
3719   LValue Base = CGF.EmitLoadOfPointerLValue(
3720       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3721       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3722   const auto *KmpTaskTWithPrivatesQTyRD =
3723       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3724   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3725   Base = CGF.EmitLValueForField(Base, *FI);
3726   for (const auto *Field :
3727        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3728     if (QualType::DestructionKind DtorKind =
3729             Field->getType().isDestructedType()) {
3730       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3731       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3732     }
3733   }
3734   CGF.FinishFunction();
3735   return DestructorFn;
3736 }
3737 
3738 /// Emit a privates mapping function for correct handling of private and
3739 /// firstprivate variables.
3740 /// \code
3741 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3742 /// **noalias priv1,...,  <tyn> **noalias privn) {
3743 ///   *priv1 = &.privates.priv1;
3744 ///   ...;
3745 ///   *privn = &.privates.privn;
3746 /// }
3747 /// \endcode
3748 static llvm::Value *
3749 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3750                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3751                                ArrayRef<PrivateDataTy> Privates) {
3752   ASTContext &C = CGM.getContext();
3753   FunctionArgList Args;
3754   ImplicitParamDecl TaskPrivatesArg(
3755       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3756       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3757       ImplicitParamDecl::Other);
3758   Args.push_back(&TaskPrivatesArg);
3759   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3760   unsigned Counter = 1;
3761   for (const Expr *E : Data.PrivateVars) {
3762     Args.push_back(ImplicitParamDecl::Create(
3763         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3764         C.getPointerType(C.getPointerType(E->getType()))
3765             .withConst()
3766             .withRestrict(),
3767         ImplicitParamDecl::Other));
3768     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3769     PrivateVarsPos[VD] = Counter;
3770     ++Counter;
3771   }
3772   for (const Expr *E : Data.FirstprivateVars) {
3773     Args.push_back(ImplicitParamDecl::Create(
3774         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3775         C.getPointerType(C.getPointerType(E->getType()))
3776             .withConst()
3777             .withRestrict(),
3778         ImplicitParamDecl::Other));
3779     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3780     PrivateVarsPos[VD] = Counter;
3781     ++Counter;
3782   }
3783   for (const Expr *E : Data.LastprivateVars) {
3784     Args.push_back(ImplicitParamDecl::Create(
3785         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3786         C.getPointerType(C.getPointerType(E->getType()))
3787             .withConst()
3788             .withRestrict(),
3789         ImplicitParamDecl::Other));
3790     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3791     PrivateVarsPos[VD] = Counter;
3792     ++Counter;
3793   }
3794   for (const VarDecl *VD : Data.PrivateLocals) {
3795     QualType Ty = VD->getType().getNonReferenceType();
3796     if (VD->getType()->isLValueReferenceType())
3797       Ty = C.getPointerType(Ty);
3798     if (isAllocatableDecl(VD))
3799       Ty = C.getPointerType(Ty);
3800     Args.push_back(ImplicitParamDecl::Create(
3801         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3802         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3803         ImplicitParamDecl::Other));
3804     PrivateVarsPos[VD] = Counter;
3805     ++Counter;
3806   }
3807   const auto &TaskPrivatesMapFnInfo =
3808       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3809   llvm::FunctionType *TaskPrivatesMapTy =
3810       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3811   std::string Name =
3812       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3813   auto *TaskPrivatesMap = llvm::Function::Create(
3814       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3815       &CGM.getModule());
3816   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3817                                     TaskPrivatesMapFnInfo);
3818   if (CGM.getLangOpts().Optimize) {
3819     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3820     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3821     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3822   }
3823   CodeGenFunction CGF(CGM);
3824   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3825                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3826 
3827   // *privi = &.privates.privi;
3828   LValue Base = CGF.EmitLoadOfPointerLValue(
3829       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3830       TaskPrivatesArg.getType()->castAs<PointerType>());
3831   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3832   Counter = 0;
3833   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3834     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3835     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3836     LValue RefLVal =
3837         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3838     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3839         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3840     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3841     ++Counter;
3842   }
3843   CGF.FinishFunction();
3844   return TaskPrivatesMap;
3845 }
3846 
3847 /// Emit initialization for private variables in task-based directives.
3848 static void emitPrivatesInit(CodeGenFunction &CGF,
3849                              const OMPExecutableDirective &D,
3850                              Address KmpTaskSharedsPtr, LValue TDBase,
3851                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3852                              QualType SharedsTy, QualType SharedsPtrTy,
3853                              const OMPTaskDataTy &Data,
3854                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3855   ASTContext &C = CGF.getContext();
3856   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3857   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3858   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3859                                  ? OMPD_taskloop
3860                                  : OMPD_task;
3861   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3862   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3863   LValue SrcBase;
3864   bool IsTargetTask =
3865       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3866       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3867   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3868   // PointersArray, SizesArray, and MappersArray. The original variables for
3869   // these arrays are not captured and we get their addresses explicitly.
3870   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3871       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3872     SrcBase = CGF.MakeAddrLValue(
3873         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3874             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3875         SharedsTy);
3876   }
3877   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3878   for (const PrivateDataTy &Pair : Privates) {
3879     // Do not initialize private locals.
3880     if (Pair.second.isLocalPrivate()) {
3881       ++FI;
3882       continue;
3883     }
3884     const VarDecl *VD = Pair.second.PrivateCopy;
3885     const Expr *Init = VD->getAnyInitializer();
3886     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3887                              !CGF.isTrivialInitializer(Init)))) {
3888       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3889       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3890         const VarDecl *OriginalVD = Pair.second.Original;
3891         // Check if the variable is the target-based BasePointersArray,
3892         // PointersArray, SizesArray, or MappersArray.
3893         LValue SharedRefLValue;
3894         QualType Type = PrivateLValue.getType();
3895         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3896         if (IsTargetTask && !SharedField) {
3897           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3898                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3899                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3900                          ->getNumParams() == 0 &&
3901                  isa<TranslationUnitDecl>(
3902                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3903                          ->getDeclContext()) &&
3904                  "Expected artificial target data variable.");
3905           SharedRefLValue =
3906               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3907         } else if (ForDup) {
3908           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3909           SharedRefLValue = CGF.MakeAddrLValue(
3910               Address(SharedRefLValue.getPointer(CGF),
3911                       C.getDeclAlign(OriginalVD)),
3912               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3913               SharedRefLValue.getTBAAInfo());
3914         } else if (CGF.LambdaCaptureFields.count(
3915                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3916                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3917           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3918         } else {
3919           // Processing for implicitly captured variables.
3920           InlinedOpenMPRegionRAII Region(
3921               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3922               /*HasCancel=*/false, /*NoInheritance=*/true);
3923           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3924         }
3925         if (Type->isArrayType()) {
3926           // Initialize firstprivate array.
3927           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3928             // Perform simple memcpy.
3929             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3930           } else {
3931             // Initialize firstprivate array using element-by-element
3932             // initialization.
3933             CGF.EmitOMPAggregateAssign(
3934                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3935                 Type,
3936                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3937                                                   Address SrcElement) {
3938                   // Clean up any temporaries needed by the initialization.
3939                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3940                   InitScope.addPrivate(
3941                       Elem, [SrcElement]() -> Address { return SrcElement; });
3942                   (void)InitScope.Privatize();
3943                   // Emit initialization for single element.
3944                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3945                       CGF, &CapturesInfo);
3946                   CGF.EmitAnyExprToMem(Init, DestElement,
3947                                        Init->getType().getQualifiers(),
3948                                        /*IsInitializer=*/false);
3949                 });
3950           }
3951         } else {
3952           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3953           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3954             return SharedRefLValue.getAddress(CGF);
3955           });
3956           (void)InitScope.Privatize();
3957           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3958           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3959                              /*capturedByInit=*/false);
3960         }
3961       } else {
3962         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3963       }
3964     }
3965     ++FI;
3966   }
3967 }
3968 
3969 /// Check if duplication function is required for taskloops.
3970 static bool checkInitIsRequired(CodeGenFunction &CGF,
3971                                 ArrayRef<PrivateDataTy> Privates) {
3972   bool InitRequired = false;
3973   for (const PrivateDataTy &Pair : Privates) {
3974     if (Pair.second.isLocalPrivate())
3975       continue;
3976     const VarDecl *VD = Pair.second.PrivateCopy;
3977     const Expr *Init = VD->getAnyInitializer();
3978     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3979                                     !CGF.isTrivialInitializer(Init));
3980     if (InitRequired)
3981       break;
3982   }
3983   return InitRequired;
3984 }
3985 
3986 
3987 /// Emit task_dup function (for initialization of
3988 /// private/firstprivate/lastprivate vars and last_iter flag)
3989 /// \code
3990 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3991 /// lastpriv) {
3992 /// // setup lastprivate flag
3993 ///    task_dst->last = lastpriv;
3994 /// // could be constructor calls here...
3995 /// }
3996 /// \endcode
3997 static llvm::Value *
3998 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3999                     const OMPExecutableDirective &D,
4000                     QualType KmpTaskTWithPrivatesPtrQTy,
4001                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4002                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4003                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4004                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4005   ASTContext &C = CGM.getContext();
4006   FunctionArgList Args;
4007   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4008                            KmpTaskTWithPrivatesPtrQTy,
4009                            ImplicitParamDecl::Other);
4010   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4011                            KmpTaskTWithPrivatesPtrQTy,
4012                            ImplicitParamDecl::Other);
4013   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4014                                 ImplicitParamDecl::Other);
4015   Args.push_back(&DstArg);
4016   Args.push_back(&SrcArg);
4017   Args.push_back(&LastprivArg);
4018   const auto &TaskDupFnInfo =
4019       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4020   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4021   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4022   auto *TaskDup = llvm::Function::Create(
4023       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4024   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4025   TaskDup->setDoesNotRecurse();
4026   CodeGenFunction CGF(CGM);
4027   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4028                     Loc);
4029 
4030   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4031       CGF.GetAddrOfLocalVar(&DstArg),
4032       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4033   // task_dst->liter = lastpriv;
4034   if (WithLastIter) {
4035     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4036     LValue Base = CGF.EmitLValueForField(
4037         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4038     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4039     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4040         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4041     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4042   }
4043 
4044   // Emit initial values for private copies (if any).
4045   assert(!Privates.empty());
4046   Address KmpTaskSharedsPtr = Address::invalid();
4047   if (!Data.FirstprivateVars.empty()) {
4048     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4049         CGF.GetAddrOfLocalVar(&SrcArg),
4050         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4051     LValue Base = CGF.EmitLValueForField(
4052         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4053     KmpTaskSharedsPtr = Address(
4054         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4055                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4056                                                   KmpTaskTShareds)),
4057                              Loc),
4058         CGM.getNaturalTypeAlignment(SharedsTy));
4059   }
4060   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4061                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4062   CGF.FinishFunction();
4063   return TaskDup;
4064 }
4065 
4066 /// Checks if destructor function is required to be generated.
4067 /// \return true if cleanups are required, false otherwise.
4068 static bool
4069 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4070                          ArrayRef<PrivateDataTy> Privates) {
4071   for (const PrivateDataTy &P : Privates) {
4072     if (P.second.isLocalPrivate())
4073       continue;
4074     QualType Ty = P.second.Original->getType().getNonReferenceType();
4075     if (Ty.isDestructedType())
4076       return true;
4077   }
4078   return false;
4079 }
4080 
4081 namespace {
4082 /// Loop generator for OpenMP iterator expression.
4083 class OMPIteratorGeneratorScope final
4084     : public CodeGenFunction::OMPPrivateScope {
4085   CodeGenFunction &CGF;
4086   const OMPIteratorExpr *E = nullptr;
4087   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4088   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4089   OMPIteratorGeneratorScope() = delete;
4090   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4091 
4092 public:
4093   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4094       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4095     if (!E)
4096       return;
4097     SmallVector<llvm::Value *, 4> Uppers;
4098     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4099       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4100       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4101       addPrivate(VD, [&CGF, VD]() {
4102         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4103       });
4104       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4105       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4106         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4107                                  "counter.addr");
4108       });
4109     }
4110     Privatize();
4111 
4112     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4113       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4114       LValue CLVal =
4115           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4116                              HelperData.CounterVD->getType());
4117       // Counter = 0;
4118       CGF.EmitStoreOfScalar(
4119           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4120           CLVal);
4121       CodeGenFunction::JumpDest &ContDest =
4122           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4123       CodeGenFunction::JumpDest &ExitDest =
4124           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4125       // N = <number-of_iterations>;
4126       llvm::Value *N = Uppers[I];
4127       // cont:
4128       // if (Counter < N) goto body; else goto exit;
4129       CGF.EmitBlock(ContDest.getBlock());
4130       auto *CVal =
4131           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4132       llvm::Value *Cmp =
4133           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4134               ? CGF.Builder.CreateICmpSLT(CVal, N)
4135               : CGF.Builder.CreateICmpULT(CVal, N);
4136       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4137       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4138       // body:
4139       CGF.EmitBlock(BodyBB);
4140       // Iteri = Begini + Counter * Stepi;
4141       CGF.EmitIgnoredExpr(HelperData.Update);
4142     }
4143   }
4144   ~OMPIteratorGeneratorScope() {
4145     if (!E)
4146       return;
4147     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4148       // Counter = Counter + 1;
4149       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4150       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4151       // goto cont;
4152       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4153       // exit:
4154       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4155     }
4156   }
4157 };
4158 } // namespace
4159 
4160 static std::pair<llvm::Value *, llvm::Value *>
4161 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4162   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4163   llvm::Value *Addr;
4164   if (OASE) {
4165     const Expr *Base = OASE->getBase();
4166     Addr = CGF.EmitScalarExpr(Base);
4167   } else {
4168     Addr = CGF.EmitLValue(E).getPointer(CGF);
4169   }
4170   llvm::Value *SizeVal;
4171   QualType Ty = E->getType();
4172   if (OASE) {
4173     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4174     for (const Expr *SE : OASE->getDimensions()) {
4175       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4176       Sz = CGF.EmitScalarConversion(
4177           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4178       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4179     }
4180   } else if (const auto *ASE =
4181                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4182     LValue UpAddrLVal =
4183         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4184     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4185     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4186         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4187     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4188     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4189     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4190   } else {
4191     SizeVal = CGF.getTypeSize(Ty);
4192   }
4193   return std::make_pair(Addr, SizeVal);
4194 }
4195 
4196 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4197 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4198   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4199   if (KmpTaskAffinityInfoTy.isNull()) {
4200     RecordDecl *KmpAffinityInfoRD =
4201         C.buildImplicitRecord("kmp_task_affinity_info_t");
4202     KmpAffinityInfoRD->startDefinition();
4203     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4204     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4205     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4206     KmpAffinityInfoRD->completeDefinition();
4207     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4208   }
4209 }
4210 
4211 CGOpenMPRuntime::TaskResultTy
4212 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4213                               const OMPExecutableDirective &D,
4214                               llvm::Function *TaskFunction, QualType SharedsTy,
4215                               Address Shareds, const OMPTaskDataTy &Data) {
4216   ASTContext &C = CGM.getContext();
4217   llvm::SmallVector<PrivateDataTy, 4> Privates;
4218   // Aggregate privates and sort them by the alignment.
4219   const auto *I = Data.PrivateCopies.begin();
4220   for (const Expr *E : Data.PrivateVars) {
4221     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4222     Privates.emplace_back(
4223         C.getDeclAlign(VD),
4224         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4225                          /*PrivateElemInit=*/nullptr));
4226     ++I;
4227   }
4228   I = Data.FirstprivateCopies.begin();
4229   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4230   for (const Expr *E : Data.FirstprivateVars) {
4231     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4232     Privates.emplace_back(
4233         C.getDeclAlign(VD),
4234         PrivateHelpersTy(
4235             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4236             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4237     ++I;
4238     ++IElemInitRef;
4239   }
4240   I = Data.LastprivateCopies.begin();
4241   for (const Expr *E : Data.LastprivateVars) {
4242     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4243     Privates.emplace_back(
4244         C.getDeclAlign(VD),
4245         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4246                          /*PrivateElemInit=*/nullptr));
4247     ++I;
4248   }
4249   for (const VarDecl *VD : Data.PrivateLocals) {
4250     if (isAllocatableDecl(VD))
4251       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4252     else
4253       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4254   }
4255   llvm::stable_sort(Privates,
4256                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4257                       return L.first > R.first;
4258                     });
4259   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4260   // Build type kmp_routine_entry_t (if not built yet).
4261   emitKmpRoutineEntryT(KmpInt32Ty);
4262   // Build type kmp_task_t (if not built yet).
4263   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4264     if (SavedKmpTaskloopTQTy.isNull()) {
4265       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4266           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4267     }
4268     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4269   } else {
4270     assert((D.getDirectiveKind() == OMPD_task ||
4271             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4272             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4273            "Expected taskloop, task or target directive");
4274     if (SavedKmpTaskTQTy.isNull()) {
4275       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4276           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4277     }
4278     KmpTaskTQTy = SavedKmpTaskTQTy;
4279   }
4280   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4281   // Build particular struct kmp_task_t for the given task.
4282   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4283       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4284   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4285   QualType KmpTaskTWithPrivatesPtrQTy =
4286       C.getPointerType(KmpTaskTWithPrivatesQTy);
4287   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4288   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4289       KmpTaskTWithPrivatesTy->getPointerTo();
4290   llvm::Value *KmpTaskTWithPrivatesTySize =
4291       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4292   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4293 
4294   // Emit initial values for private copies (if any).
4295   llvm::Value *TaskPrivatesMap = nullptr;
4296   llvm::Type *TaskPrivatesMapTy =
4297       std::next(TaskFunction->arg_begin(), 3)->getType();
4298   if (!Privates.empty()) {
4299     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4300     TaskPrivatesMap =
4301         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4302     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4303         TaskPrivatesMap, TaskPrivatesMapTy);
4304   } else {
4305     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4306         cast<llvm::PointerType>(TaskPrivatesMapTy));
4307   }
4308   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4309   // kmp_task_t *tt);
4310   llvm::Function *TaskEntry = emitProxyTaskFunction(
4311       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4312       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4313       TaskPrivatesMap);
4314 
4315   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4316   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4317   // kmp_routine_entry_t *task_entry);
4318   // Task flags. Format is taken from
4319   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4320   // description of kmp_tasking_flags struct.
4321   enum {
4322     TiedFlag = 0x1,
4323     FinalFlag = 0x2,
4324     DestructorsFlag = 0x8,
4325     PriorityFlag = 0x20,
4326     DetachableFlag = 0x40,
4327   };
4328   unsigned Flags = Data.Tied ? TiedFlag : 0;
4329   bool NeedsCleanup = false;
4330   if (!Privates.empty()) {
4331     NeedsCleanup =
4332         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4333     if (NeedsCleanup)
4334       Flags = Flags | DestructorsFlag;
4335   }
4336   if (Data.Priority.getInt())
4337     Flags = Flags | PriorityFlag;
4338   if (D.hasClausesOfKind<OMPDetachClause>())
4339     Flags = Flags | DetachableFlag;
4340   llvm::Value *TaskFlags =
4341       Data.Final.getPointer()
4342           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4343                                      CGF.Builder.getInt32(FinalFlag),
4344                                      CGF.Builder.getInt32(/*C=*/0))
4345           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4346   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4347   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4348   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4349       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4350       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4351           TaskEntry, KmpRoutineEntryPtrTy)};
4352   llvm::Value *NewTask;
4353   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4354     // Check if we have any device clause associated with the directive.
4355     const Expr *Device = nullptr;
4356     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4357       Device = C->getDevice();
4358     // Emit device ID if any otherwise use default value.
4359     llvm::Value *DeviceID;
4360     if (Device)
4361       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4362                                            CGF.Int64Ty, /*isSigned=*/true);
4363     else
4364       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4365     AllocArgs.push_back(DeviceID);
4366     NewTask = CGF.EmitRuntimeCall(
4367         OMPBuilder.getOrCreateRuntimeFunction(
4368             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4369         AllocArgs);
4370   } else {
4371     NewTask =
4372         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4373                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4374                             AllocArgs);
4375   }
4376   // Emit detach clause initialization.
4377   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4378   // task_descriptor);
4379   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4380     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4381     LValue EvtLVal = CGF.EmitLValue(Evt);
4382 
4383     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4384     // int gtid, kmp_task_t *task);
4385     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4386     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4387     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4388     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4389         OMPBuilder.getOrCreateRuntimeFunction(
4390             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4391         {Loc, Tid, NewTask});
4392     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4393                                       Evt->getExprLoc());
4394     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4395   }
4396   // Process affinity clauses.
4397   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4398     // Process list of affinity data.
4399     ASTContext &C = CGM.getContext();
4400     Address AffinitiesArray = Address::invalid();
4401     // Calculate number of elements to form the array of affinity data.
4402     llvm::Value *NumOfElements = nullptr;
4403     unsigned NumAffinities = 0;
4404     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4405       if (const Expr *Modifier = C->getModifier()) {
4406         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4407         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4408           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4409           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4410           NumOfElements =
4411               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4412         }
4413       } else {
4414         NumAffinities += C->varlist_size();
4415       }
4416     }
4417     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4418     // Fields ids in kmp_task_affinity_info record.
4419     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4420 
4421     QualType KmpTaskAffinityInfoArrayTy;
4422     if (NumOfElements) {
4423       NumOfElements = CGF.Builder.CreateNUWAdd(
4424           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4425       auto *OVE = new (C) OpaqueValueExpr(
4426           Loc,
4427           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4428           VK_PRValue);
4429       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4430                                                     RValue::get(NumOfElements));
4431       KmpTaskAffinityInfoArrayTy =
4432           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4433                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4434       // Properly emit variable-sized array.
4435       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4436                                            ImplicitParamDecl::Other);
4437       CGF.EmitVarDecl(*PD);
4438       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4439       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4440                                                 /*isSigned=*/false);
4441     } else {
4442       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4443           KmpTaskAffinityInfoTy,
4444           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4445           ArrayType::Normal, /*IndexTypeQuals=*/0);
4446       AffinitiesArray =
4447           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4448       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4449       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4450                                              /*isSigned=*/false);
4451     }
4452 
4453     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4454     // Fill array by elements without iterators.
4455     unsigned Pos = 0;
4456     bool HasIterator = false;
4457     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4458       if (C->getModifier()) {
4459         HasIterator = true;
4460         continue;
4461       }
4462       for (const Expr *E : C->varlists()) {
4463         llvm::Value *Addr;
4464         llvm::Value *Size;
4465         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4466         LValue Base =
4467             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4468                                KmpTaskAffinityInfoTy);
4469         // affs[i].base_addr = &<Affinities[i].second>;
4470         LValue BaseAddrLVal = CGF.EmitLValueForField(
4471             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4472         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4473                               BaseAddrLVal);
4474         // affs[i].len = sizeof(<Affinities[i].second>);
4475         LValue LenLVal = CGF.EmitLValueForField(
4476             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4477         CGF.EmitStoreOfScalar(Size, LenLVal);
4478         ++Pos;
4479       }
4480     }
4481     LValue PosLVal;
4482     if (HasIterator) {
4483       PosLVal = CGF.MakeAddrLValue(
4484           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4485           C.getSizeType());
4486       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4487     }
4488     // Process elements with iterators.
4489     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4490       const Expr *Modifier = C->getModifier();
4491       if (!Modifier)
4492         continue;
4493       OMPIteratorGeneratorScope IteratorScope(
4494           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4495       for (const Expr *E : C->varlists()) {
4496         llvm::Value *Addr;
4497         llvm::Value *Size;
4498         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4499         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4500         LValue Base = CGF.MakeAddrLValue(
4501             Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4502                                           AffinitiesArray.getPointer(), Idx),
4503                     AffinitiesArray.getAlignment()),
4504             KmpTaskAffinityInfoTy);
4505         // affs[i].base_addr = &<Affinities[i].second>;
4506         LValue BaseAddrLVal = CGF.EmitLValueForField(
4507             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4508         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4509                               BaseAddrLVal);
4510         // affs[i].len = sizeof(<Affinities[i].second>);
4511         LValue LenLVal = CGF.EmitLValueForField(
4512             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4513         CGF.EmitStoreOfScalar(Size, LenLVal);
4514         Idx = CGF.Builder.CreateNUWAdd(
4515             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4516         CGF.EmitStoreOfScalar(Idx, PosLVal);
4517       }
4518     }
4519     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4520     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4521     // naffins, kmp_task_affinity_info_t *affin_list);
4522     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4523     llvm::Value *GTid = getThreadID(CGF, Loc);
4524     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4525         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4526     // FIXME: Emit the function and ignore its result for now unless the
4527     // runtime function is properly implemented.
4528     (void)CGF.EmitRuntimeCall(
4529         OMPBuilder.getOrCreateRuntimeFunction(
4530             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4531         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4532   }
4533   llvm::Value *NewTaskNewTaskTTy =
4534       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4535           NewTask, KmpTaskTWithPrivatesPtrTy);
4536   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4537                                                KmpTaskTWithPrivatesQTy);
4538   LValue TDBase =
4539       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4540   // Fill the data in the resulting kmp_task_t record.
4541   // Copy shareds if there are any.
4542   Address KmpTaskSharedsPtr = Address::invalid();
4543   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4544     KmpTaskSharedsPtr =
4545         Address(CGF.EmitLoadOfScalar(
4546                     CGF.EmitLValueForField(
4547                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4548                                            KmpTaskTShareds)),
4549                     Loc),
4550                 CGM.getNaturalTypeAlignment(SharedsTy));
4551     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4552     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4553     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4554   }
4555   // Emit initial values for private copies (if any).
4556   TaskResultTy Result;
4557   if (!Privates.empty()) {
4558     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4559                      SharedsTy, SharedsPtrTy, Data, Privates,
4560                      /*ForDup=*/false);
4561     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4562         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4563       Result.TaskDupFn = emitTaskDupFunction(
4564           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4565           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4566           /*WithLastIter=*/!Data.LastprivateVars.empty());
4567     }
4568   }
4569   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4570   enum { Priority = 0, Destructors = 1 };
4571   // Provide pointer to function with destructors for privates.
4572   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4573   const RecordDecl *KmpCmplrdataUD =
4574       (*FI)->getType()->getAsUnionType()->getDecl();
4575   if (NeedsCleanup) {
4576     llvm::Value *DestructorFn = emitDestructorsFunction(
4577         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4578         KmpTaskTWithPrivatesQTy);
4579     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4580     LValue DestructorsLV = CGF.EmitLValueForField(
4581         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4582     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4583                               DestructorFn, KmpRoutineEntryPtrTy),
4584                           DestructorsLV);
4585   }
4586   // Set priority.
4587   if (Data.Priority.getInt()) {
4588     LValue Data2LV = CGF.EmitLValueForField(
4589         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4590     LValue PriorityLV = CGF.EmitLValueForField(
4591         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4592     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4593   }
4594   Result.NewTask = NewTask;
4595   Result.TaskEntry = TaskEntry;
4596   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4597   Result.TDBase = TDBase;
4598   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4599   return Result;
4600 }
4601 
4602 namespace {
4603 /// Dependence kind for RTL.
4604 enum RTLDependenceKindTy {
4605   DepIn = 0x01,
4606   DepInOut = 0x3,
4607   DepMutexInOutSet = 0x4
4608 };
4609 /// Fields ids in kmp_depend_info record.
4610 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4611 } // namespace
4612 
4613 /// Translates internal dependency kind into the runtime kind.
4614 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4615   RTLDependenceKindTy DepKind;
4616   switch (K) {
4617   case OMPC_DEPEND_in:
4618     DepKind = DepIn;
4619     break;
4620   // Out and InOut dependencies must use the same code.
4621   case OMPC_DEPEND_out:
4622   case OMPC_DEPEND_inout:
4623     DepKind = DepInOut;
4624     break;
4625   case OMPC_DEPEND_mutexinoutset:
4626     DepKind = DepMutexInOutSet;
4627     break;
4628   case OMPC_DEPEND_source:
4629   case OMPC_DEPEND_sink:
4630   case OMPC_DEPEND_depobj:
4631   case OMPC_DEPEND_unknown:
4632     llvm_unreachable("Unknown task dependence type");
4633   }
4634   return DepKind;
4635 }
4636 
4637 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4638 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4639                            QualType &FlagsTy) {
4640   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4641   if (KmpDependInfoTy.isNull()) {
4642     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4643     KmpDependInfoRD->startDefinition();
4644     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4645     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4646     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4647     KmpDependInfoRD->completeDefinition();
4648     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4649   }
4650 }
4651 
4652 std::pair<llvm::Value *, LValue>
4653 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4654                                    SourceLocation Loc) {
4655   ASTContext &C = CGM.getContext();
4656   QualType FlagsTy;
4657   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4658   RecordDecl *KmpDependInfoRD =
4659       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4660   LValue Base = CGF.EmitLoadOfPointerLValue(
4661       DepobjLVal.getAddress(CGF),
4662       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4663   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4664   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4665           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4666   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4667                             Base.getTBAAInfo());
4668   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4669       Addr.getElementType(), Addr.getPointer(),
4670       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4671   LValue NumDepsBase = CGF.MakeAddrLValue(
4672       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4673       Base.getBaseInfo(), Base.getTBAAInfo());
4674   // NumDeps = deps[i].base_addr;
4675   LValue BaseAddrLVal = CGF.EmitLValueForField(
4676       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4677   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4678   return std::make_pair(NumDeps, Base);
4679 }
4680 
4681 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4682                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4683                            const OMPTaskDataTy::DependData &Data,
4684                            Address DependenciesArray) {
4685   CodeGenModule &CGM = CGF.CGM;
4686   ASTContext &C = CGM.getContext();
4687   QualType FlagsTy;
4688   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4689   RecordDecl *KmpDependInfoRD =
4690       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4691   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4692 
4693   OMPIteratorGeneratorScope IteratorScope(
4694       CGF, cast_or_null<OMPIteratorExpr>(
4695                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4696                                  : nullptr));
4697   for (const Expr *E : Data.DepExprs) {
4698     llvm::Value *Addr;
4699     llvm::Value *Size;
4700     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4701     LValue Base;
4702     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4703       Base = CGF.MakeAddrLValue(
4704           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4705     } else {
4706       LValue &PosLVal = *Pos.get<LValue *>();
4707       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4708       Base = CGF.MakeAddrLValue(
4709           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4710                                         DependenciesArray.getPointer(), Idx),
4711                   DependenciesArray.getAlignment()),
4712           KmpDependInfoTy);
4713     }
4714     // deps[i].base_addr = &<Dependencies[i].second>;
4715     LValue BaseAddrLVal = CGF.EmitLValueForField(
4716         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4717     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4718                           BaseAddrLVal);
4719     // deps[i].len = sizeof(<Dependencies[i].second>);
4720     LValue LenLVal = CGF.EmitLValueForField(
4721         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4722     CGF.EmitStoreOfScalar(Size, LenLVal);
4723     // deps[i].flags = <Dependencies[i].first>;
4724     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4725     LValue FlagsLVal = CGF.EmitLValueForField(
4726         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4727     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4728                           FlagsLVal);
4729     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4730       ++(*P);
4731     } else {
4732       LValue &PosLVal = *Pos.get<LValue *>();
4733       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4734       Idx = CGF.Builder.CreateNUWAdd(Idx,
4735                                      llvm::ConstantInt::get(Idx->getType(), 1));
4736       CGF.EmitStoreOfScalar(Idx, PosLVal);
4737     }
4738   }
4739 }
4740 
4741 static SmallVector<llvm::Value *, 4>
4742 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4743                         const OMPTaskDataTy::DependData &Data) {
4744   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4745          "Expected depobj dependecy kind.");
4746   SmallVector<llvm::Value *, 4> Sizes;
4747   SmallVector<LValue, 4> SizeLVals;
4748   ASTContext &C = CGF.getContext();
4749   QualType FlagsTy;
4750   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4751   RecordDecl *KmpDependInfoRD =
4752       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4753   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4754   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4755   {
4756     OMPIteratorGeneratorScope IteratorScope(
4757         CGF, cast_or_null<OMPIteratorExpr>(
4758                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4759                                    : nullptr));
4760     for (const Expr *E : Data.DepExprs) {
4761       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4762       LValue Base = CGF.EmitLoadOfPointerLValue(
4763           DepobjLVal.getAddress(CGF),
4764           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4765       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4766           Base.getAddress(CGF), KmpDependInfoPtrT);
4767       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4768                                 Base.getTBAAInfo());
4769       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4770           Addr.getElementType(), Addr.getPointer(),
4771           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4772       LValue NumDepsBase = CGF.MakeAddrLValue(
4773           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4774           Base.getBaseInfo(), Base.getTBAAInfo());
4775       // NumDeps = deps[i].base_addr;
4776       LValue BaseAddrLVal = CGF.EmitLValueForField(
4777           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4778       llvm::Value *NumDeps =
4779           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4780       LValue NumLVal = CGF.MakeAddrLValue(
4781           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4782           C.getUIntPtrType());
4783       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4784                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4785       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4786       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4787       CGF.EmitStoreOfScalar(Add, NumLVal);
4788       SizeLVals.push_back(NumLVal);
4789     }
4790   }
4791   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4792     llvm::Value *Size =
4793         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4794     Sizes.push_back(Size);
4795   }
4796   return Sizes;
4797 }
4798 
4799 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4800                                LValue PosLVal,
4801                                const OMPTaskDataTy::DependData &Data,
4802                                Address DependenciesArray) {
4803   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4804          "Expected depobj dependecy kind.");
4805   ASTContext &C = CGF.getContext();
4806   QualType FlagsTy;
4807   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4808   RecordDecl *KmpDependInfoRD =
4809       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4810   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4811   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4812   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4813   {
4814     OMPIteratorGeneratorScope IteratorScope(
4815         CGF, cast_or_null<OMPIteratorExpr>(
4816                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4817                                    : nullptr));
4818     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4819       const Expr *E = Data.DepExprs[I];
4820       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4821       LValue Base = CGF.EmitLoadOfPointerLValue(
4822           DepobjLVal.getAddress(CGF),
4823           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4824       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4825           Base.getAddress(CGF), KmpDependInfoPtrT);
4826       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4827                                 Base.getTBAAInfo());
4828 
4829       // Get number of elements in a single depobj.
4830       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4831           Addr.getElementType(), Addr.getPointer(),
4832           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4833       LValue NumDepsBase = CGF.MakeAddrLValue(
4834           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4835           Base.getBaseInfo(), Base.getTBAAInfo());
4836       // NumDeps = deps[i].base_addr;
4837       LValue BaseAddrLVal = CGF.EmitLValueForField(
4838           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4839       llvm::Value *NumDeps =
4840           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4841 
4842       // memcopy dependency data.
4843       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4844           ElSize,
4845           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4846       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4847       Address DepAddr =
4848           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4849                                         DependenciesArray.getPointer(), Pos),
4850                   DependenciesArray.getAlignment());
4851       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4852 
4853       // Increase pos.
4854       // pos += size;
4855       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4856       CGF.EmitStoreOfScalar(Add, PosLVal);
4857     }
4858   }
4859 }
4860 
4861 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4862     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4863     SourceLocation Loc) {
4864   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4865         return D.DepExprs.empty();
4866       }))
4867     return std::make_pair(nullptr, Address::invalid());
4868   // Process list of dependencies.
4869   ASTContext &C = CGM.getContext();
4870   Address DependenciesArray = Address::invalid();
4871   llvm::Value *NumOfElements = nullptr;
4872   unsigned NumDependencies = std::accumulate(
4873       Dependencies.begin(), Dependencies.end(), 0,
4874       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4875         return D.DepKind == OMPC_DEPEND_depobj
4876                    ? V
4877                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4878       });
4879   QualType FlagsTy;
4880   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4881   bool HasDepobjDeps = false;
4882   bool HasRegularWithIterators = false;
4883   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4884   llvm::Value *NumOfRegularWithIterators =
4885       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4886   // Calculate number of depobj dependecies and regular deps with the iterators.
4887   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4888     if (D.DepKind == OMPC_DEPEND_depobj) {
4889       SmallVector<llvm::Value *, 4> Sizes =
4890           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4891       for (llvm::Value *Size : Sizes) {
4892         NumOfDepobjElements =
4893             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4894       }
4895       HasDepobjDeps = true;
4896       continue;
4897     }
4898     // Include number of iterations, if any.
4899     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4900       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4901         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4902         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4903         NumOfRegularWithIterators =
4904             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4905       }
4906       HasRegularWithIterators = true;
4907       continue;
4908     }
4909   }
4910 
4911   QualType KmpDependInfoArrayTy;
4912   if (HasDepobjDeps || HasRegularWithIterators) {
4913     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4914                                            /*isSigned=*/false);
4915     if (HasDepobjDeps) {
4916       NumOfElements =
4917           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4918     }
4919     if (HasRegularWithIterators) {
4920       NumOfElements =
4921           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4922     }
4923     auto *OVE = new (C) OpaqueValueExpr(
4924         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4925         VK_PRValue);
4926     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4927                                                   RValue::get(NumOfElements));
4928     KmpDependInfoArrayTy =
4929         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4930                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4931     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4932     // Properly emit variable-sized array.
4933     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4934                                          ImplicitParamDecl::Other);
4935     CGF.EmitVarDecl(*PD);
4936     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4937     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4938                                               /*isSigned=*/false);
4939   } else {
4940     KmpDependInfoArrayTy = C.getConstantArrayType(
4941         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4942         ArrayType::Normal, /*IndexTypeQuals=*/0);
4943     DependenciesArray =
4944         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4945     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4946     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4947                                            /*isSigned=*/false);
4948   }
4949   unsigned Pos = 0;
4950   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4951     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4952         Dependencies[I].IteratorExpr)
4953       continue;
4954     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4955                    DependenciesArray);
4956   }
4957   // Copy regular dependecies with iterators.
4958   LValue PosLVal = CGF.MakeAddrLValue(
4959       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4960   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4961   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4962     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4963         !Dependencies[I].IteratorExpr)
4964       continue;
4965     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4966                    DependenciesArray);
4967   }
4968   // Copy final depobj arrays without iterators.
4969   if (HasDepobjDeps) {
4970     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4971       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4972         continue;
4973       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4974                          DependenciesArray);
4975     }
4976   }
4977   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4978       DependenciesArray, CGF.VoidPtrTy);
4979   return std::make_pair(NumOfElements, DependenciesArray);
4980 }
4981 
4982 Address CGOpenMPRuntime::emitDepobjDependClause(
4983     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4984     SourceLocation Loc) {
4985   if (Dependencies.DepExprs.empty())
4986     return Address::invalid();
4987   // Process list of dependencies.
4988   ASTContext &C = CGM.getContext();
4989   Address DependenciesArray = Address::invalid();
4990   unsigned NumDependencies = Dependencies.DepExprs.size();
4991   QualType FlagsTy;
4992   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4993   RecordDecl *KmpDependInfoRD =
4994       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4995 
4996   llvm::Value *Size;
4997   // Define type kmp_depend_info[<Dependencies.size()>];
4998   // For depobj reserve one extra element to store the number of elements.
4999   // It is required to handle depobj(x) update(in) construct.
5000   // kmp_depend_info[<Dependencies.size()>] deps;
5001   llvm::Value *NumDepsVal;
5002   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
5003   if (const auto *IE =
5004           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
5005     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
5006     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5007       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5008       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5009       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5010     }
5011     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5012                                     NumDepsVal);
5013     CharUnits SizeInBytes =
5014         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5015     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5016     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5017     NumDepsVal =
5018         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5019   } else {
5020     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5021         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5022         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5023     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5024     Size = CGM.getSize(Sz.alignTo(Align));
5025     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5026   }
5027   // Need to allocate on the dynamic memory.
5028   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5029   // Use default allocator.
5030   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5031   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5032 
5033   llvm::Value *Addr =
5034       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5035                               CGM.getModule(), OMPRTL___kmpc_alloc),
5036                           Args, ".dep.arr.addr");
5037   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5038       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5039   DependenciesArray = Address(Addr, Align);
5040   // Write number of elements in the first element of array for depobj.
5041   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5042   // deps[i].base_addr = NumDependencies;
5043   LValue BaseAddrLVal = CGF.EmitLValueForField(
5044       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5045   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5046   llvm::PointerUnion<unsigned *, LValue *> Pos;
5047   unsigned Idx = 1;
5048   LValue PosLVal;
5049   if (Dependencies.IteratorExpr) {
5050     PosLVal = CGF.MakeAddrLValue(
5051         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5052         C.getSizeType());
5053     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5054                           /*IsInit=*/true);
5055     Pos = &PosLVal;
5056   } else {
5057     Pos = &Idx;
5058   }
5059   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5060   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5061       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5062   return DependenciesArray;
5063 }
5064 
5065 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5066                                         SourceLocation Loc) {
5067   ASTContext &C = CGM.getContext();
5068   QualType FlagsTy;
5069   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5070   LValue Base = CGF.EmitLoadOfPointerLValue(
5071       DepobjLVal.getAddress(CGF),
5072       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5073   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5074   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5075       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5076   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5077       Addr.getElementType(), Addr.getPointer(),
5078       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5079   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5080                                                                CGF.VoidPtrTy);
5081   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5082   // Use default allocator.
5083   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5084   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5085 
5086   // _kmpc_free(gtid, addr, nullptr);
5087   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5088                                 CGM.getModule(), OMPRTL___kmpc_free),
5089                             Args);
5090 }
5091 
5092 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5093                                        OpenMPDependClauseKind NewDepKind,
5094                                        SourceLocation Loc) {
5095   ASTContext &C = CGM.getContext();
5096   QualType FlagsTy;
5097   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5098   RecordDecl *KmpDependInfoRD =
5099       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5100   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5101   llvm::Value *NumDeps;
5102   LValue Base;
5103   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5104 
5105   Address Begin = Base.getAddress(CGF);
5106   // Cast from pointer to array type to pointer to single element.
5107   llvm::Value *End = CGF.Builder.CreateGEP(
5108       Begin.getElementType(), Begin.getPointer(), NumDeps);
5109   // The basic structure here is a while-do loop.
5110   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5111   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5112   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5113   CGF.EmitBlock(BodyBB);
5114   llvm::PHINode *ElementPHI =
5115       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5116   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5117   Begin = Address(ElementPHI, Begin.getAlignment());
5118   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5119                             Base.getTBAAInfo());
5120   // deps[i].flags = NewDepKind;
5121   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5122   LValue FlagsLVal = CGF.EmitLValueForField(
5123       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5124   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5125                         FlagsLVal);
5126 
5127   // Shift the address forward by one element.
5128   Address ElementNext =
5129       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5130   ElementPHI->addIncoming(ElementNext.getPointer(),
5131                           CGF.Builder.GetInsertBlock());
5132   llvm::Value *IsEmpty =
5133       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5134   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5135   // Done.
5136   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5137 }
5138 
5139 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5140                                    const OMPExecutableDirective &D,
5141                                    llvm::Function *TaskFunction,
5142                                    QualType SharedsTy, Address Shareds,
5143                                    const Expr *IfCond,
5144                                    const OMPTaskDataTy &Data) {
5145   if (!CGF.HaveInsertPoint())
5146     return;
5147 
5148   TaskResultTy Result =
5149       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5150   llvm::Value *NewTask = Result.NewTask;
5151   llvm::Function *TaskEntry = Result.TaskEntry;
5152   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5153   LValue TDBase = Result.TDBase;
5154   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5155   // Process list of dependences.
5156   Address DependenciesArray = Address::invalid();
5157   llvm::Value *NumOfElements;
5158   std::tie(NumOfElements, DependenciesArray) =
5159       emitDependClause(CGF, Data.Dependences, Loc);
5160 
5161   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5162   // libcall.
5163   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5164   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5165   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5166   // list is not empty
5167   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5168   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5169   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5170   llvm::Value *DepTaskArgs[7];
5171   if (!Data.Dependences.empty()) {
5172     DepTaskArgs[0] = UpLoc;
5173     DepTaskArgs[1] = ThreadID;
5174     DepTaskArgs[2] = NewTask;
5175     DepTaskArgs[3] = NumOfElements;
5176     DepTaskArgs[4] = DependenciesArray.getPointer();
5177     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5178     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5179   }
5180   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5181                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5182     if (!Data.Tied) {
5183       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5184       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5185       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5186     }
5187     if (!Data.Dependences.empty()) {
5188       CGF.EmitRuntimeCall(
5189           OMPBuilder.getOrCreateRuntimeFunction(
5190               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5191           DepTaskArgs);
5192     } else {
5193       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5194                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5195                           TaskArgs);
5196     }
5197     // Check if parent region is untied and build return for untied task;
5198     if (auto *Region =
5199             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5200       Region->emitUntiedSwitch(CGF);
5201   };
5202 
5203   llvm::Value *DepWaitTaskArgs[6];
5204   if (!Data.Dependences.empty()) {
5205     DepWaitTaskArgs[0] = UpLoc;
5206     DepWaitTaskArgs[1] = ThreadID;
5207     DepWaitTaskArgs[2] = NumOfElements;
5208     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5209     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5210     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5211   }
5212   auto &M = CGM.getModule();
5213   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5214                         TaskEntry, &Data, &DepWaitTaskArgs,
5215                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5216     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5217     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5218     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5219     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5220     // is specified.
5221     if (!Data.Dependences.empty())
5222       CGF.EmitRuntimeCall(
5223           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5224           DepWaitTaskArgs);
5225     // Call proxy_task_entry(gtid, new_task);
5226     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5227                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5228       Action.Enter(CGF);
5229       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5230       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5231                                                           OutlinedFnArgs);
5232     };
5233 
5234     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5235     // kmp_task_t *new_task);
5236     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5237     // kmp_task_t *new_task);
5238     RegionCodeGenTy RCG(CodeGen);
5239     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5240                               M, OMPRTL___kmpc_omp_task_begin_if0),
5241                           TaskArgs,
5242                           OMPBuilder.getOrCreateRuntimeFunction(
5243                               M, OMPRTL___kmpc_omp_task_complete_if0),
5244                           TaskArgs);
5245     RCG.setAction(Action);
5246     RCG(CGF);
5247   };
5248 
5249   if (IfCond) {
5250     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5251   } else {
5252     RegionCodeGenTy ThenRCG(ThenCodeGen);
5253     ThenRCG(CGF);
5254   }
5255 }
5256 
5257 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5258                                        const OMPLoopDirective &D,
5259                                        llvm::Function *TaskFunction,
5260                                        QualType SharedsTy, Address Shareds,
5261                                        const Expr *IfCond,
5262                                        const OMPTaskDataTy &Data) {
5263   if (!CGF.HaveInsertPoint())
5264     return;
5265   TaskResultTy Result =
5266       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5267   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5268   // libcall.
5269   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5270   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5271   // sched, kmp_uint64 grainsize, void *task_dup);
5272   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5273   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5274   llvm::Value *IfVal;
5275   if (IfCond) {
5276     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5277                                       /*isSigned=*/true);
5278   } else {
5279     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5280   }
5281 
5282   LValue LBLVal = CGF.EmitLValueForField(
5283       Result.TDBase,
5284       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5285   const auto *LBVar =
5286       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5287   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5288                        LBLVal.getQuals(),
5289                        /*IsInitializer=*/true);
5290   LValue UBLVal = CGF.EmitLValueForField(
5291       Result.TDBase,
5292       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5293   const auto *UBVar =
5294       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5295   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5296                        UBLVal.getQuals(),
5297                        /*IsInitializer=*/true);
5298   LValue StLVal = CGF.EmitLValueForField(
5299       Result.TDBase,
5300       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5301   const auto *StVar =
5302       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5303   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5304                        StLVal.getQuals(),
5305                        /*IsInitializer=*/true);
5306   // Store reductions address.
5307   LValue RedLVal = CGF.EmitLValueForField(
5308       Result.TDBase,
5309       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5310   if (Data.Reductions) {
5311     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5312   } else {
5313     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5314                                CGF.getContext().VoidPtrTy);
5315   }
5316   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5317   llvm::Value *TaskArgs[] = {
5318       UpLoc,
5319       ThreadID,
5320       Result.NewTask,
5321       IfVal,
5322       LBLVal.getPointer(CGF),
5323       UBLVal.getPointer(CGF),
5324       CGF.EmitLoadOfScalar(StLVal, Loc),
5325       llvm::ConstantInt::getSigned(
5326           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5327       llvm::ConstantInt::getSigned(
5328           CGF.IntTy, Data.Schedule.getPointer()
5329                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5330                          : NoSchedule),
5331       Data.Schedule.getPointer()
5332           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5333                                       /*isSigned=*/false)
5334           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5335       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5336                              Result.TaskDupFn, CGF.VoidPtrTy)
5337                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5338   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5339                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5340                       TaskArgs);
5341 }
5342 
5343 /// Emit reduction operation for each element of array (required for
5344 /// array sections) LHS op = RHS.
5345 /// \param Type Type of array.
5346 /// \param LHSVar Variable on the left side of the reduction operation
5347 /// (references element of array in original variable).
5348 /// \param RHSVar Variable on the right side of the reduction operation
5349 /// (references element of array in original variable).
5350 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5351 /// RHSVar.
5352 static void EmitOMPAggregateReduction(
5353     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5354     const VarDecl *RHSVar,
5355     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5356                                   const Expr *, const Expr *)> &RedOpGen,
5357     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5358     const Expr *UpExpr = nullptr) {
5359   // Perform element-by-element initialization.
5360   QualType ElementTy;
5361   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5362   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5363 
5364   // Drill down to the base element type on both arrays.
5365   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5366   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5367 
5368   llvm::Value *RHSBegin = RHSAddr.getPointer();
5369   llvm::Value *LHSBegin = LHSAddr.getPointer();
5370   // Cast from pointer to array type to pointer to single element.
5371   llvm::Value *LHSEnd =
5372       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5373   // The basic structure here is a while-do loop.
5374   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5375   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5376   llvm::Value *IsEmpty =
5377       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5378   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5379 
5380   // Enter the loop body, making that address the current address.
5381   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5382   CGF.EmitBlock(BodyBB);
5383 
5384   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5385 
5386   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5387       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5388   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5389   Address RHSElementCurrent =
5390       Address(RHSElementPHI,
5391               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5392 
5393   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5394       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5395   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5396   Address LHSElementCurrent =
5397       Address(LHSElementPHI,
5398               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5399 
5400   // Emit copy.
5401   CodeGenFunction::OMPPrivateScope Scope(CGF);
5402   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5403   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5404   Scope.Privatize();
5405   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5406   Scope.ForceCleanup();
5407 
5408   // Shift the address forward by one element.
5409   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5410       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5411       "omp.arraycpy.dest.element");
5412   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5413       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5414       "omp.arraycpy.src.element");
5415   // Check whether we've reached the end.
5416   llvm::Value *Done =
5417       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5418   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5419   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5420   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5421 
5422   // Done.
5423   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5424 }
5425 
5426 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5427 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5428 /// UDR combiner function.
5429 static void emitReductionCombiner(CodeGenFunction &CGF,
5430                                   const Expr *ReductionOp) {
5431   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5432     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5433       if (const auto *DRE =
5434               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5435         if (const auto *DRD =
5436                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5437           std::pair<llvm::Function *, llvm::Function *> Reduction =
5438               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5439           RValue Func = RValue::get(Reduction.first);
5440           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5441           CGF.EmitIgnoredExpr(ReductionOp);
5442           return;
5443         }
5444   CGF.EmitIgnoredExpr(ReductionOp);
5445 }
5446 
5447 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5448     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5449     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5450     ArrayRef<const Expr *> ReductionOps) {
5451   ASTContext &C = CGM.getContext();
5452 
5453   // void reduction_func(void *LHSArg, void *RHSArg);
5454   FunctionArgList Args;
5455   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5456                            ImplicitParamDecl::Other);
5457   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5458                            ImplicitParamDecl::Other);
5459   Args.push_back(&LHSArg);
5460   Args.push_back(&RHSArg);
5461   const auto &CGFI =
5462       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5463   std::string Name = getName({"omp", "reduction", "reduction_func"});
5464   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5465                                     llvm::GlobalValue::InternalLinkage, Name,
5466                                     &CGM.getModule());
5467   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5468   Fn->setDoesNotRecurse();
5469   CodeGenFunction CGF(CGM);
5470   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5471 
5472   // Dst = (void*[n])(LHSArg);
5473   // Src = (void*[n])(RHSArg);
5474   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5475       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5476       ArgsType), CGF.getPointerAlign());
5477   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5478       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5479       ArgsType), CGF.getPointerAlign());
5480 
5481   //  ...
5482   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5483   //  ...
5484   CodeGenFunction::OMPPrivateScope Scope(CGF);
5485   auto IPriv = Privates.begin();
5486   unsigned Idx = 0;
5487   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5488     const auto *RHSVar =
5489         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5490     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5491       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5492     });
5493     const auto *LHSVar =
5494         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5495     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5496       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5497     });
5498     QualType PrivTy = (*IPriv)->getType();
5499     if (PrivTy->isVariablyModifiedType()) {
5500       // Get array size and emit VLA type.
5501       ++Idx;
5502       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5503       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5504       const VariableArrayType *VLA =
5505           CGF.getContext().getAsVariableArrayType(PrivTy);
5506       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5507       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5508           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5509       CGF.EmitVariablyModifiedType(PrivTy);
5510     }
5511   }
5512   Scope.Privatize();
5513   IPriv = Privates.begin();
5514   auto ILHS = LHSExprs.begin();
5515   auto IRHS = RHSExprs.begin();
5516   for (const Expr *E : ReductionOps) {
5517     if ((*IPriv)->getType()->isArrayType()) {
5518       // Emit reduction for array section.
5519       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5520       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5521       EmitOMPAggregateReduction(
5522           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5523           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5524             emitReductionCombiner(CGF, E);
5525           });
5526     } else {
5527       // Emit reduction for array subscript or single variable.
5528       emitReductionCombiner(CGF, E);
5529     }
5530     ++IPriv;
5531     ++ILHS;
5532     ++IRHS;
5533   }
5534   Scope.ForceCleanup();
5535   CGF.FinishFunction();
5536   return Fn;
5537 }
5538 
5539 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5540                                                   const Expr *ReductionOp,
5541                                                   const Expr *PrivateRef,
5542                                                   const DeclRefExpr *LHS,
5543                                                   const DeclRefExpr *RHS) {
5544   if (PrivateRef->getType()->isArrayType()) {
5545     // Emit reduction for array section.
5546     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5547     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5548     EmitOMPAggregateReduction(
5549         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5550         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5551           emitReductionCombiner(CGF, ReductionOp);
5552         });
5553   } else {
5554     // Emit reduction for array subscript or single variable.
5555     emitReductionCombiner(CGF, ReductionOp);
5556   }
5557 }
5558 
5559 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5560                                     ArrayRef<const Expr *> Privates,
5561                                     ArrayRef<const Expr *> LHSExprs,
5562                                     ArrayRef<const Expr *> RHSExprs,
5563                                     ArrayRef<const Expr *> ReductionOps,
5564                                     ReductionOptionsTy Options) {
5565   if (!CGF.HaveInsertPoint())
5566     return;
5567 
5568   bool WithNowait = Options.WithNowait;
5569   bool SimpleReduction = Options.SimpleReduction;
5570 
5571   // Next code should be emitted for reduction:
5572   //
5573   // static kmp_critical_name lock = { 0 };
5574   //
5575   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5576   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5577   //  ...
5578   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5579   //  *(Type<n>-1*)rhs[<n>-1]);
5580   // }
5581   //
5582   // ...
5583   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5584   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5585   // RedList, reduce_func, &<lock>)) {
5586   // case 1:
5587   //  ...
5588   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5589   //  ...
5590   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5591   // break;
5592   // case 2:
5593   //  ...
5594   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5595   //  ...
5596   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5597   // break;
5598   // default:;
5599   // }
5600   //
5601   // if SimpleReduction is true, only the next code is generated:
5602   //  ...
5603   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5604   //  ...
5605 
5606   ASTContext &C = CGM.getContext();
5607 
5608   if (SimpleReduction) {
5609     CodeGenFunction::RunCleanupsScope Scope(CGF);
5610     auto IPriv = Privates.begin();
5611     auto ILHS = LHSExprs.begin();
5612     auto IRHS = RHSExprs.begin();
5613     for (const Expr *E : ReductionOps) {
5614       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5615                                   cast<DeclRefExpr>(*IRHS));
5616       ++IPriv;
5617       ++ILHS;
5618       ++IRHS;
5619     }
5620     return;
5621   }
5622 
5623   // 1. Build a list of reduction variables.
5624   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5625   auto Size = RHSExprs.size();
5626   for (const Expr *E : Privates) {
5627     if (E->getType()->isVariablyModifiedType())
5628       // Reserve place for array size.
5629       ++Size;
5630   }
5631   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5632   QualType ReductionArrayTy =
5633       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5634                              /*IndexTypeQuals=*/0);
5635   Address ReductionList =
5636       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5637   auto IPriv = Privates.begin();
5638   unsigned Idx = 0;
5639   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5640     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5641     CGF.Builder.CreateStore(
5642         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5643             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5644         Elem);
5645     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5646       // Store array size.
5647       ++Idx;
5648       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5649       llvm::Value *Size = CGF.Builder.CreateIntCast(
5650           CGF.getVLASize(
5651                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5652               .NumElts,
5653           CGF.SizeTy, /*isSigned=*/false);
5654       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5655                               Elem);
5656     }
5657   }
5658 
5659   // 2. Emit reduce_func().
5660   llvm::Function *ReductionFn = emitReductionFunction(
5661       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5662       LHSExprs, RHSExprs, ReductionOps);
5663 
5664   // 3. Create static kmp_critical_name lock = { 0 };
5665   std::string Name = getName({"reduction"});
5666   llvm::Value *Lock = getCriticalRegionLock(Name);
5667 
5668   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5669   // RedList, reduce_func, &<lock>);
5670   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5671   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5672   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5673   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5674       ReductionList.getPointer(), CGF.VoidPtrTy);
5675   llvm::Value *Args[] = {
5676       IdentTLoc,                             // ident_t *<loc>
5677       ThreadId,                              // i32 <gtid>
5678       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5679       ReductionArrayTySize,                  // size_type sizeof(RedList)
5680       RL,                                    // void *RedList
5681       ReductionFn, // void (*) (void *, void *) <reduce_func>
5682       Lock         // kmp_critical_name *&<lock>
5683   };
5684   llvm::Value *Res = CGF.EmitRuntimeCall(
5685       OMPBuilder.getOrCreateRuntimeFunction(
5686           CGM.getModule(),
5687           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5688       Args);
5689 
5690   // 5. Build switch(res)
5691   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5692   llvm::SwitchInst *SwInst =
5693       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5694 
5695   // 6. Build case 1:
5696   //  ...
5697   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5698   //  ...
5699   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5700   // break;
5701   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5702   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5703   CGF.EmitBlock(Case1BB);
5704 
5705   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5706   llvm::Value *EndArgs[] = {
5707       IdentTLoc, // ident_t *<loc>
5708       ThreadId,  // i32 <gtid>
5709       Lock       // kmp_critical_name *&<lock>
5710   };
5711   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5712                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5713     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5714     auto IPriv = Privates.begin();
5715     auto ILHS = LHSExprs.begin();
5716     auto IRHS = RHSExprs.begin();
5717     for (const Expr *E : ReductionOps) {
5718       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5719                                      cast<DeclRefExpr>(*IRHS));
5720       ++IPriv;
5721       ++ILHS;
5722       ++IRHS;
5723     }
5724   };
5725   RegionCodeGenTy RCG(CodeGen);
5726   CommonActionTy Action(
5727       nullptr, llvm::None,
5728       OMPBuilder.getOrCreateRuntimeFunction(
5729           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5730                                       : OMPRTL___kmpc_end_reduce),
5731       EndArgs);
5732   RCG.setAction(Action);
5733   RCG(CGF);
5734 
5735   CGF.EmitBranch(DefaultBB);
5736 
5737   // 7. Build case 2:
5738   //  ...
5739   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5740   //  ...
5741   // break;
5742   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5743   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5744   CGF.EmitBlock(Case2BB);
5745 
5746   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5747                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5748     auto ILHS = LHSExprs.begin();
5749     auto IRHS = RHSExprs.begin();
5750     auto IPriv = Privates.begin();
5751     for (const Expr *E : ReductionOps) {
5752       const Expr *XExpr = nullptr;
5753       const Expr *EExpr = nullptr;
5754       const Expr *UpExpr = nullptr;
5755       BinaryOperatorKind BO = BO_Comma;
5756       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5757         if (BO->getOpcode() == BO_Assign) {
5758           XExpr = BO->getLHS();
5759           UpExpr = BO->getRHS();
5760         }
5761       }
5762       // Try to emit update expression as a simple atomic.
5763       const Expr *RHSExpr = UpExpr;
5764       if (RHSExpr) {
5765         // Analyze RHS part of the whole expression.
5766         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5767                 RHSExpr->IgnoreParenImpCasts())) {
5768           // If this is a conditional operator, analyze its condition for
5769           // min/max reduction operator.
5770           RHSExpr = ACO->getCond();
5771         }
5772         if (const auto *BORHS =
5773                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5774           EExpr = BORHS->getRHS();
5775           BO = BORHS->getOpcode();
5776         }
5777       }
5778       if (XExpr) {
5779         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5780         auto &&AtomicRedGen = [BO, VD,
5781                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5782                                     const Expr *EExpr, const Expr *UpExpr) {
5783           LValue X = CGF.EmitLValue(XExpr);
5784           RValue E;
5785           if (EExpr)
5786             E = CGF.EmitAnyExpr(EExpr);
5787           CGF.EmitOMPAtomicSimpleUpdateExpr(
5788               X, E, BO, /*IsXLHSInRHSPart=*/true,
5789               llvm::AtomicOrdering::Monotonic, Loc,
5790               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5791                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5792                 PrivateScope.addPrivate(
5793                     VD, [&CGF, VD, XRValue, Loc]() {
5794                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5795                       CGF.emitOMPSimpleStore(
5796                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5797                           VD->getType().getNonReferenceType(), Loc);
5798                       return LHSTemp;
5799                     });
5800                 (void)PrivateScope.Privatize();
5801                 return CGF.EmitAnyExpr(UpExpr);
5802               });
5803         };
5804         if ((*IPriv)->getType()->isArrayType()) {
5805           // Emit atomic reduction for array section.
5806           const auto *RHSVar =
5807               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5808           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5809                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5810         } else {
5811           // Emit atomic reduction for array subscript or single variable.
5812           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5813         }
5814       } else {
5815         // Emit as a critical region.
5816         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5817                                            const Expr *, const Expr *) {
5818           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5819           std::string Name = RT.getName({"atomic_reduction"});
5820           RT.emitCriticalRegion(
5821               CGF, Name,
5822               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5823                 Action.Enter(CGF);
5824                 emitReductionCombiner(CGF, E);
5825               },
5826               Loc);
5827         };
5828         if ((*IPriv)->getType()->isArrayType()) {
5829           const auto *LHSVar =
5830               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5831           const auto *RHSVar =
5832               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5833           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5834                                     CritRedGen);
5835         } else {
5836           CritRedGen(CGF, nullptr, nullptr, nullptr);
5837         }
5838       }
5839       ++ILHS;
5840       ++IRHS;
5841       ++IPriv;
5842     }
5843   };
5844   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5845   if (!WithNowait) {
5846     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5847     llvm::Value *EndArgs[] = {
5848         IdentTLoc, // ident_t *<loc>
5849         ThreadId,  // i32 <gtid>
5850         Lock       // kmp_critical_name *&<lock>
5851     };
5852     CommonActionTy Action(nullptr, llvm::None,
5853                           OMPBuilder.getOrCreateRuntimeFunction(
5854                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5855                           EndArgs);
5856     AtomicRCG.setAction(Action);
5857     AtomicRCG(CGF);
5858   } else {
5859     AtomicRCG(CGF);
5860   }
5861 
5862   CGF.EmitBranch(DefaultBB);
5863   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5864 }
5865 
5866 /// Generates unique name for artificial threadprivate variables.
5867 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5868 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5869                                       const Expr *Ref) {
5870   SmallString<256> Buffer;
5871   llvm::raw_svector_ostream Out(Buffer);
5872   const clang::DeclRefExpr *DE;
5873   const VarDecl *D = ::getBaseDecl(Ref, DE);
5874   if (!D)
5875     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5876   D = D->getCanonicalDecl();
5877   std::string Name = CGM.getOpenMPRuntime().getName(
5878       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5879   Out << Prefix << Name << "_"
5880       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5881   return std::string(Out.str());
5882 }
5883 
5884 /// Emits reduction initializer function:
5885 /// \code
5886 /// void @.red_init(void* %arg, void* %orig) {
5887 /// %0 = bitcast void* %arg to <type>*
5888 /// store <type> <init>, <type>* %0
5889 /// ret void
5890 /// }
5891 /// \endcode
5892 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5893                                            SourceLocation Loc,
5894                                            ReductionCodeGen &RCG, unsigned N) {
5895   ASTContext &C = CGM.getContext();
5896   QualType VoidPtrTy = C.VoidPtrTy;
5897   VoidPtrTy.addRestrict();
5898   FunctionArgList Args;
5899   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5900                           ImplicitParamDecl::Other);
5901   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5902                               ImplicitParamDecl::Other);
5903   Args.emplace_back(&Param);
5904   Args.emplace_back(&ParamOrig);
5905   const auto &FnInfo =
5906       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5907   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5908   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5909   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5910                                     Name, &CGM.getModule());
5911   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5912   Fn->setDoesNotRecurse();
5913   CodeGenFunction CGF(CGM);
5914   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5915   Address PrivateAddr = CGF.EmitLoadOfPointer(
5916       CGF.GetAddrOfLocalVar(&Param),
5917       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5918   llvm::Value *Size = nullptr;
5919   // If the size of the reduction item is non-constant, load it from global
5920   // threadprivate variable.
5921   if (RCG.getSizes(N).second) {
5922     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5923         CGF, CGM.getContext().getSizeType(),
5924         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5925     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5926                                 CGM.getContext().getSizeType(), Loc);
5927   }
5928   RCG.emitAggregateType(CGF, N, Size);
5929   LValue OrigLVal;
5930   // If initializer uses initializer from declare reduction construct, emit a
5931   // pointer to the address of the original reduction item (reuired by reduction
5932   // initializer)
5933   if (RCG.usesReductionInitializer(N)) {
5934     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5935     SharedAddr = CGF.EmitLoadOfPointer(
5936         SharedAddr,
5937         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5938     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5939   } else {
5940     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5941         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5942         CGM.getContext().VoidPtrTy);
5943   }
5944   // Emit the initializer:
5945   // %0 = bitcast void* %arg to <type>*
5946   // store <type> <init>, <type>* %0
5947   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5948                          [](CodeGenFunction &) { return false; });
5949   CGF.FinishFunction();
5950   return Fn;
5951 }
5952 
5953 /// Emits reduction combiner function:
5954 /// \code
5955 /// void @.red_comb(void* %arg0, void* %arg1) {
5956 /// %lhs = bitcast void* %arg0 to <type>*
5957 /// %rhs = bitcast void* %arg1 to <type>*
5958 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5959 /// store <type> %2, <type>* %lhs
5960 /// ret void
5961 /// }
5962 /// \endcode
5963 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5964                                            SourceLocation Loc,
5965                                            ReductionCodeGen &RCG, unsigned N,
5966                                            const Expr *ReductionOp,
5967                                            const Expr *LHS, const Expr *RHS,
5968                                            const Expr *PrivateRef) {
5969   ASTContext &C = CGM.getContext();
5970   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5971   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5972   FunctionArgList Args;
5973   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5974                                C.VoidPtrTy, ImplicitParamDecl::Other);
5975   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5976                             ImplicitParamDecl::Other);
5977   Args.emplace_back(&ParamInOut);
5978   Args.emplace_back(&ParamIn);
5979   const auto &FnInfo =
5980       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5981   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5982   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5983   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5984                                     Name, &CGM.getModule());
5985   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5986   Fn->setDoesNotRecurse();
5987   CodeGenFunction CGF(CGM);
5988   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5989   llvm::Value *Size = nullptr;
5990   // If the size of the reduction item is non-constant, load it from global
5991   // threadprivate variable.
5992   if (RCG.getSizes(N).second) {
5993     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5994         CGF, CGM.getContext().getSizeType(),
5995         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5996     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5997                                 CGM.getContext().getSizeType(), Loc);
5998   }
5999   RCG.emitAggregateType(CGF, N, Size);
6000   // Remap lhs and rhs variables to the addresses of the function arguments.
6001   // %lhs = bitcast void* %arg0 to <type>*
6002   // %rhs = bitcast void* %arg1 to <type>*
6003   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6004   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6005     // Pull out the pointer to the variable.
6006     Address PtrAddr = CGF.EmitLoadOfPointer(
6007         CGF.GetAddrOfLocalVar(&ParamInOut),
6008         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6009     return CGF.Builder.CreateElementBitCast(
6010         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6011   });
6012   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6013     // Pull out the pointer to the variable.
6014     Address PtrAddr = CGF.EmitLoadOfPointer(
6015         CGF.GetAddrOfLocalVar(&ParamIn),
6016         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6017     return CGF.Builder.CreateElementBitCast(
6018         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6019   });
6020   PrivateScope.Privatize();
6021   // Emit the combiner body:
6022   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6023   // store <type> %2, <type>* %lhs
6024   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6025       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6026       cast<DeclRefExpr>(RHS));
6027   CGF.FinishFunction();
6028   return Fn;
6029 }
6030 
6031 /// Emits reduction finalizer function:
6032 /// \code
6033 /// void @.red_fini(void* %arg) {
6034 /// %0 = bitcast void* %arg to <type>*
6035 /// <destroy>(<type>* %0)
6036 /// ret void
6037 /// }
6038 /// \endcode
6039 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6040                                            SourceLocation Loc,
6041                                            ReductionCodeGen &RCG, unsigned N) {
6042   if (!RCG.needCleanups(N))
6043     return nullptr;
6044   ASTContext &C = CGM.getContext();
6045   FunctionArgList Args;
6046   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6047                           ImplicitParamDecl::Other);
6048   Args.emplace_back(&Param);
6049   const auto &FnInfo =
6050       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6051   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6052   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6053   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6054                                     Name, &CGM.getModule());
6055   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6056   Fn->setDoesNotRecurse();
6057   CodeGenFunction CGF(CGM);
6058   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6059   Address PrivateAddr = CGF.EmitLoadOfPointer(
6060       CGF.GetAddrOfLocalVar(&Param),
6061       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6062   llvm::Value *Size = nullptr;
6063   // If the size of the reduction item is non-constant, load it from global
6064   // threadprivate variable.
6065   if (RCG.getSizes(N).second) {
6066     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6067         CGF, CGM.getContext().getSizeType(),
6068         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6069     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6070                                 CGM.getContext().getSizeType(), Loc);
6071   }
6072   RCG.emitAggregateType(CGF, N, Size);
6073   // Emit the finalizer body:
6074   // <destroy>(<type>* %0)
6075   RCG.emitCleanups(CGF, N, PrivateAddr);
6076   CGF.FinishFunction(Loc);
6077   return Fn;
6078 }
6079 
6080 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6081     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6082     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6083   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6084     return nullptr;
6085 
6086   // Build typedef struct:
6087   // kmp_taskred_input {
6088   //   void *reduce_shar; // shared reduction item
6089   //   void *reduce_orig; // original reduction item used for initialization
6090   //   size_t reduce_size; // size of data item
6091   //   void *reduce_init; // data initialization routine
6092   //   void *reduce_fini; // data finalization routine
6093   //   void *reduce_comb; // data combiner routine
6094   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6095   // } kmp_taskred_input_t;
6096   ASTContext &C = CGM.getContext();
6097   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6098   RD->startDefinition();
6099   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6100   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6101   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6102   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6103   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6104   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6105   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6106       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6107   RD->completeDefinition();
6108   QualType RDType = C.getRecordType(RD);
6109   unsigned Size = Data.ReductionVars.size();
6110   llvm::APInt ArraySize(/*numBits=*/64, Size);
6111   QualType ArrayRDType = C.getConstantArrayType(
6112       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6113   // kmp_task_red_input_t .rd_input.[Size];
6114   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6115   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6116                        Data.ReductionCopies, Data.ReductionOps);
6117   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6118     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6119     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6120                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6121     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6122         TaskRedInput.getPointer(), Idxs,
6123         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6124         ".rd_input.gep.");
6125     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6126     // ElemLVal.reduce_shar = &Shareds[Cnt];
6127     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6128     RCG.emitSharedOrigLValue(CGF, Cnt);
6129     llvm::Value *CastedShared =
6130         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6131     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6132     // ElemLVal.reduce_orig = &Origs[Cnt];
6133     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6134     llvm::Value *CastedOrig =
6135         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6136     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6137     RCG.emitAggregateType(CGF, Cnt);
6138     llvm::Value *SizeValInChars;
6139     llvm::Value *SizeVal;
6140     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6141     // We use delayed creation/initialization for VLAs and array sections. It is
6142     // required because runtime does not provide the way to pass the sizes of
6143     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6144     // threadprivate global variables are used to store these values and use
6145     // them in the functions.
6146     bool DelayedCreation = !!SizeVal;
6147     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6148                                                /*isSigned=*/false);
6149     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6150     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6151     // ElemLVal.reduce_init = init;
6152     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6153     llvm::Value *InitAddr =
6154         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6155     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6156     // ElemLVal.reduce_fini = fini;
6157     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6158     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6159     llvm::Value *FiniAddr = Fini
6160                                 ? CGF.EmitCastToVoidPtr(Fini)
6161                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6162     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6163     // ElemLVal.reduce_comb = comb;
6164     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6165     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6166         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6167         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6168     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6169     // ElemLVal.flags = 0;
6170     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6171     if (DelayedCreation) {
6172       CGF.EmitStoreOfScalar(
6173           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6174           FlagsLVal);
6175     } else
6176       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6177                                  FlagsLVal.getType());
6178   }
6179   if (Data.IsReductionWithTaskMod) {
6180     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6181     // is_ws, int num, void *data);
6182     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6183     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6184                                                   CGM.IntTy, /*isSigned=*/true);
6185     llvm::Value *Args[] = {
6186         IdentTLoc, GTid,
6187         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6188                                /*isSigned=*/true),
6189         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6190         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6191             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6192     return CGF.EmitRuntimeCall(
6193         OMPBuilder.getOrCreateRuntimeFunction(
6194             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6195         Args);
6196   }
6197   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6198   llvm::Value *Args[] = {
6199       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6200                                 /*isSigned=*/true),
6201       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6202       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6203                                                       CGM.VoidPtrTy)};
6204   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6205                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6206                              Args);
6207 }
6208 
6209 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6210                                             SourceLocation Loc,
6211                                             bool IsWorksharingReduction) {
6212   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6213   // is_ws, int num, void *data);
6214   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6215   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6216                                                 CGM.IntTy, /*isSigned=*/true);
6217   llvm::Value *Args[] = {IdentTLoc, GTid,
6218                          llvm::ConstantInt::get(CGM.IntTy,
6219                                                 IsWorksharingReduction ? 1 : 0,
6220                                                 /*isSigned=*/true)};
6221   (void)CGF.EmitRuntimeCall(
6222       OMPBuilder.getOrCreateRuntimeFunction(
6223           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6224       Args);
6225 }
6226 
6227 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6228                                               SourceLocation Loc,
6229                                               ReductionCodeGen &RCG,
6230                                               unsigned N) {
6231   auto Sizes = RCG.getSizes(N);
6232   // Emit threadprivate global variable if the type is non-constant
6233   // (Sizes.second = nullptr).
6234   if (Sizes.second) {
6235     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6236                                                      /*isSigned=*/false);
6237     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6238         CGF, CGM.getContext().getSizeType(),
6239         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6240     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6241   }
6242 }
6243 
6244 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6245                                               SourceLocation Loc,
6246                                               llvm::Value *ReductionsPtr,
6247                                               LValue SharedLVal) {
6248   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6249   // *d);
6250   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6251                                                    CGM.IntTy,
6252                                                    /*isSigned=*/true),
6253                          ReductionsPtr,
6254                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6255                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6256   return Address(
6257       CGF.EmitRuntimeCall(
6258           OMPBuilder.getOrCreateRuntimeFunction(
6259               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6260           Args),
6261       SharedLVal.getAlignment());
6262 }
6263 
6264 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6265                                        SourceLocation Loc) {
6266   if (!CGF.HaveInsertPoint())
6267     return;
6268 
6269   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6270     OMPBuilder.createTaskwait(CGF.Builder);
6271   } else {
6272     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6273     // global_tid);
6274     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6275     // Ignore return result until untied tasks are supported.
6276     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6277                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6278                         Args);
6279   }
6280 
6281   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6282     Region->emitUntiedSwitch(CGF);
6283 }
6284 
6285 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6286                                            OpenMPDirectiveKind InnerKind,
6287                                            const RegionCodeGenTy &CodeGen,
6288                                            bool HasCancel) {
6289   if (!CGF.HaveInsertPoint())
6290     return;
6291   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6292                                  InnerKind != OMPD_critical &&
6293                                      InnerKind != OMPD_master &&
6294                                      InnerKind != OMPD_masked);
6295   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6296 }
6297 
6298 namespace {
6299 enum RTCancelKind {
6300   CancelNoreq = 0,
6301   CancelParallel = 1,
6302   CancelLoop = 2,
6303   CancelSections = 3,
6304   CancelTaskgroup = 4
6305 };
6306 } // anonymous namespace
6307 
6308 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6309   RTCancelKind CancelKind = CancelNoreq;
6310   if (CancelRegion == OMPD_parallel)
6311     CancelKind = CancelParallel;
6312   else if (CancelRegion == OMPD_for)
6313     CancelKind = CancelLoop;
6314   else if (CancelRegion == OMPD_sections)
6315     CancelKind = CancelSections;
6316   else {
6317     assert(CancelRegion == OMPD_taskgroup);
6318     CancelKind = CancelTaskgroup;
6319   }
6320   return CancelKind;
6321 }
6322 
6323 void CGOpenMPRuntime::emitCancellationPointCall(
6324     CodeGenFunction &CGF, SourceLocation Loc,
6325     OpenMPDirectiveKind CancelRegion) {
6326   if (!CGF.HaveInsertPoint())
6327     return;
6328   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6329   // global_tid, kmp_int32 cncl_kind);
6330   if (auto *OMPRegionInfo =
6331           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6332     // For 'cancellation point taskgroup', the task region info may not have a
6333     // cancel. This may instead happen in another adjacent task.
6334     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6335       llvm::Value *Args[] = {
6336           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6337           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6338       // Ignore return result until untied tasks are supported.
6339       llvm::Value *Result = CGF.EmitRuntimeCall(
6340           OMPBuilder.getOrCreateRuntimeFunction(
6341               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6342           Args);
6343       // if (__kmpc_cancellationpoint()) {
6344       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6345       //   exit from construct;
6346       // }
6347       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6348       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6349       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6350       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6351       CGF.EmitBlock(ExitBB);
6352       if (CancelRegion == OMPD_parallel)
6353         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6354       // exit from construct;
6355       CodeGenFunction::JumpDest CancelDest =
6356           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6357       CGF.EmitBranchThroughCleanup(CancelDest);
6358       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6359     }
6360   }
6361 }
6362 
6363 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6364                                      const Expr *IfCond,
6365                                      OpenMPDirectiveKind CancelRegion) {
6366   if (!CGF.HaveInsertPoint())
6367     return;
6368   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6369   // kmp_int32 cncl_kind);
6370   auto &M = CGM.getModule();
6371   if (auto *OMPRegionInfo =
6372           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6373     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6374                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6375       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6376       llvm::Value *Args[] = {
6377           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6378           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6379       // Ignore return result until untied tasks are supported.
6380       llvm::Value *Result = CGF.EmitRuntimeCall(
6381           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6382       // if (__kmpc_cancel()) {
6383       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6384       //   exit from construct;
6385       // }
6386       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6387       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6388       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6389       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6390       CGF.EmitBlock(ExitBB);
6391       if (CancelRegion == OMPD_parallel)
6392         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6393       // exit from construct;
6394       CodeGenFunction::JumpDest CancelDest =
6395           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6396       CGF.EmitBranchThroughCleanup(CancelDest);
6397       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6398     };
6399     if (IfCond) {
6400       emitIfClause(CGF, IfCond, ThenGen,
6401                    [](CodeGenFunction &, PrePostActionTy &) {});
6402     } else {
6403       RegionCodeGenTy ThenRCG(ThenGen);
6404       ThenRCG(CGF);
6405     }
6406   }
6407 }
6408 
6409 namespace {
6410 /// Cleanup action for uses_allocators support.
6411 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6412   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6413 
6414 public:
6415   OMPUsesAllocatorsActionTy(
6416       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6417       : Allocators(Allocators) {}
6418   void Enter(CodeGenFunction &CGF) override {
6419     if (!CGF.HaveInsertPoint())
6420       return;
6421     for (const auto &AllocatorData : Allocators) {
6422       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6423           CGF, AllocatorData.first, AllocatorData.second);
6424     }
6425   }
6426   void Exit(CodeGenFunction &CGF) override {
6427     if (!CGF.HaveInsertPoint())
6428       return;
6429     for (const auto &AllocatorData : Allocators) {
6430       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6431                                                         AllocatorData.first);
6432     }
6433   }
6434 };
6435 } // namespace
6436 
6437 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6438     const OMPExecutableDirective &D, StringRef ParentName,
6439     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6440     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6441   assert(!ParentName.empty() && "Invalid target region parent name!");
6442   HasEmittedTargetRegion = true;
6443   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6444   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6445     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6446       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6447       if (!D.AllocatorTraits)
6448         continue;
6449       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6450     }
6451   }
6452   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6453   CodeGen.setAction(UsesAllocatorAction);
6454   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6455                                    IsOffloadEntry, CodeGen);
6456 }
6457 
6458 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6459                                              const Expr *Allocator,
6460                                              const Expr *AllocatorTraits) {
6461   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6462   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6463   // Use default memspace handle.
6464   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6465   llvm::Value *NumTraits = llvm::ConstantInt::get(
6466       CGF.IntTy, cast<ConstantArrayType>(
6467                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6468                      ->getSize()
6469                      .getLimitedValue());
6470   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6471   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6472       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6473   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6474                                            AllocatorTraitsLVal.getBaseInfo(),
6475                                            AllocatorTraitsLVal.getTBAAInfo());
6476   llvm::Value *Traits =
6477       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6478 
6479   llvm::Value *AllocatorVal =
6480       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6481                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6482                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6483   // Store to allocator.
6484   CGF.EmitVarDecl(*cast<VarDecl>(
6485       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6486   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6487   AllocatorVal =
6488       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6489                                Allocator->getType(), Allocator->getExprLoc());
6490   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6491 }
6492 
6493 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6494                                              const Expr *Allocator) {
6495   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6496   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6497   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6498   llvm::Value *AllocatorVal =
6499       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6500   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6501                                           CGF.getContext().VoidPtrTy,
6502                                           Allocator->getExprLoc());
6503   (void)CGF.EmitRuntimeCall(
6504       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6505                                             OMPRTL___kmpc_destroy_allocator),
6506       {ThreadId, AllocatorVal});
6507 }
6508 
6509 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6510     const OMPExecutableDirective &D, StringRef ParentName,
6511     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6512     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6513   // Create a unique name for the entry function using the source location
6514   // information of the current target region. The name will be something like:
6515   //
6516   // __omp_offloading_DD_FFFF_PP_lBB
6517   //
6518   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6519   // mangled name of the function that encloses the target region and BB is the
6520   // line number of the target region.
6521 
6522   unsigned DeviceID;
6523   unsigned FileID;
6524   unsigned Line;
6525   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6526                            Line);
6527   SmallString<64> EntryFnName;
6528   {
6529     llvm::raw_svector_ostream OS(EntryFnName);
6530     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6531        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6532   }
6533 
6534   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6535 
6536   CodeGenFunction CGF(CGM, true);
6537   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6538   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6539 
6540   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6541 
6542   // If this target outline function is not an offload entry, we don't need to
6543   // register it.
6544   if (!IsOffloadEntry)
6545     return;
6546 
6547   // The target region ID is used by the runtime library to identify the current
6548   // target region, so it only has to be unique and not necessarily point to
6549   // anything. It could be the pointer to the outlined function that implements
6550   // the target region, but we aren't using that so that the compiler doesn't
6551   // need to keep that, and could therefore inline the host function if proven
6552   // worthwhile during optimization. In the other hand, if emitting code for the
6553   // device, the ID has to be the function address so that it can retrieved from
6554   // the offloading entry and launched by the runtime library. We also mark the
6555   // outlined function to have external linkage in case we are emitting code for
6556   // the device, because these functions will be entry points to the device.
6557 
6558   if (CGM.getLangOpts().OpenMPIsDevice) {
6559     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6560     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6561     OutlinedFn->setDSOLocal(false);
6562     if (CGM.getTriple().isAMDGCN())
6563       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6564   } else {
6565     std::string Name = getName({EntryFnName, "region_id"});
6566     OutlinedFnID = new llvm::GlobalVariable(
6567         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6568         llvm::GlobalValue::WeakAnyLinkage,
6569         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6570   }
6571 
6572   // Register the information for the entry associated with this target region.
6573   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6574       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6575       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6576 
6577   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6578   int32_t DefaultValTeams = -1;
6579   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6580   if (DefaultValTeams > 0) {
6581     OutlinedFn->addFnAttr("omp_target_num_teams",
6582                           std::to_string(DefaultValTeams));
6583   }
6584   int32_t DefaultValThreads = -1;
6585   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6586   if (DefaultValThreads > 0) {
6587     OutlinedFn->addFnAttr("omp_target_thread_limit",
6588                           std::to_string(DefaultValThreads));
6589   }
6590 }
6591 
6592 /// Checks if the expression is constant or does not have non-trivial function
6593 /// calls.
6594 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6595   // We can skip constant expressions.
6596   // We can skip expressions with trivial calls or simple expressions.
6597   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6598           !E->hasNonTrivialCall(Ctx)) &&
6599          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6600 }
6601 
6602 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6603                                                     const Stmt *Body) {
6604   const Stmt *Child = Body->IgnoreContainers();
6605   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6606     Child = nullptr;
6607     for (const Stmt *S : C->body()) {
6608       if (const auto *E = dyn_cast<Expr>(S)) {
6609         if (isTrivial(Ctx, E))
6610           continue;
6611       }
6612       // Some of the statements can be ignored.
6613       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6614           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6615         continue;
6616       // Analyze declarations.
6617       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6618         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6619               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6620                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6621                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6622                   isa<UsingDirectiveDecl>(D) ||
6623                   isa<OMPDeclareReductionDecl>(D) ||
6624                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6625                 return true;
6626               const auto *VD = dyn_cast<VarDecl>(D);
6627               if (!VD)
6628                 return false;
6629               return VD->hasGlobalStorage() || !VD->isUsed();
6630             }))
6631           continue;
6632       }
6633       // Found multiple children - cannot get the one child only.
6634       if (Child)
6635         return nullptr;
6636       Child = S;
6637     }
6638     if (Child)
6639       Child = Child->IgnoreContainers();
6640   }
6641   return Child;
6642 }
6643 
6644 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6645     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6646     int32_t &DefaultVal) {
6647 
6648   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6649   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6650          "Expected target-based executable directive.");
6651   switch (DirectiveKind) {
6652   case OMPD_target: {
6653     const auto *CS = D.getInnermostCapturedStmt();
6654     const auto *Body =
6655         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6656     const Stmt *ChildStmt =
6657         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6658     if (const auto *NestedDir =
6659             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6660       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6661         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6662           const Expr *NumTeams =
6663               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6664           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6665             if (auto Constant =
6666                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6667               DefaultVal = Constant->getExtValue();
6668           return NumTeams;
6669         }
6670         DefaultVal = 0;
6671         return nullptr;
6672       }
6673       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6674           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6675         DefaultVal = 1;
6676         return nullptr;
6677       }
6678       DefaultVal = 1;
6679       return nullptr;
6680     }
6681     // A value of -1 is used to check if we need to emit no teams region
6682     DefaultVal = -1;
6683     return nullptr;
6684   }
6685   case OMPD_target_teams:
6686   case OMPD_target_teams_distribute:
6687   case OMPD_target_teams_distribute_simd:
6688   case OMPD_target_teams_distribute_parallel_for:
6689   case OMPD_target_teams_distribute_parallel_for_simd: {
6690     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6691       const Expr *NumTeams =
6692           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6693       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6694         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6695           DefaultVal = Constant->getExtValue();
6696       return NumTeams;
6697     }
6698     DefaultVal = 0;
6699     return nullptr;
6700   }
6701   case OMPD_target_parallel:
6702   case OMPD_target_parallel_for:
6703   case OMPD_target_parallel_for_simd:
6704   case OMPD_target_simd:
6705     DefaultVal = 1;
6706     return nullptr;
6707   case OMPD_parallel:
6708   case OMPD_for:
6709   case OMPD_parallel_for:
6710   case OMPD_parallel_master:
6711   case OMPD_parallel_sections:
6712   case OMPD_for_simd:
6713   case OMPD_parallel_for_simd:
6714   case OMPD_cancel:
6715   case OMPD_cancellation_point:
6716   case OMPD_ordered:
6717   case OMPD_threadprivate:
6718   case OMPD_allocate:
6719   case OMPD_task:
6720   case OMPD_simd:
6721   case OMPD_tile:
6722   case OMPD_unroll:
6723   case OMPD_sections:
6724   case OMPD_section:
6725   case OMPD_single:
6726   case OMPD_master:
6727   case OMPD_critical:
6728   case OMPD_taskyield:
6729   case OMPD_barrier:
6730   case OMPD_taskwait:
6731   case OMPD_taskgroup:
6732   case OMPD_atomic:
6733   case OMPD_flush:
6734   case OMPD_depobj:
6735   case OMPD_scan:
6736   case OMPD_teams:
6737   case OMPD_target_data:
6738   case OMPD_target_exit_data:
6739   case OMPD_target_enter_data:
6740   case OMPD_distribute:
6741   case OMPD_distribute_simd:
6742   case OMPD_distribute_parallel_for:
6743   case OMPD_distribute_parallel_for_simd:
6744   case OMPD_teams_distribute:
6745   case OMPD_teams_distribute_simd:
6746   case OMPD_teams_distribute_parallel_for:
6747   case OMPD_teams_distribute_parallel_for_simd:
6748   case OMPD_target_update:
6749   case OMPD_declare_simd:
6750   case OMPD_declare_variant:
6751   case OMPD_begin_declare_variant:
6752   case OMPD_end_declare_variant:
6753   case OMPD_declare_target:
6754   case OMPD_end_declare_target:
6755   case OMPD_declare_reduction:
6756   case OMPD_declare_mapper:
6757   case OMPD_taskloop:
6758   case OMPD_taskloop_simd:
6759   case OMPD_master_taskloop:
6760   case OMPD_master_taskloop_simd:
6761   case OMPD_parallel_master_taskloop:
6762   case OMPD_parallel_master_taskloop_simd:
6763   case OMPD_requires:
6764   case OMPD_metadirective:
6765   case OMPD_unknown:
6766     break;
6767   default:
6768     break;
6769   }
6770   llvm_unreachable("Unexpected directive kind.");
6771 }
6772 
6773 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6774     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6775   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6776          "Clauses associated with the teams directive expected to be emitted "
6777          "only for the host!");
6778   CGBuilderTy &Bld = CGF.Builder;
6779   int32_t DefaultNT = -1;
6780   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6781   if (NumTeams != nullptr) {
6782     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6783 
6784     switch (DirectiveKind) {
6785     case OMPD_target: {
6786       const auto *CS = D.getInnermostCapturedStmt();
6787       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6788       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6789       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6790                                                   /*IgnoreResultAssign*/ true);
6791       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6792                              /*isSigned=*/true);
6793     }
6794     case OMPD_target_teams:
6795     case OMPD_target_teams_distribute:
6796     case OMPD_target_teams_distribute_simd:
6797     case OMPD_target_teams_distribute_parallel_for:
6798     case OMPD_target_teams_distribute_parallel_for_simd: {
6799       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6800       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6801                                                   /*IgnoreResultAssign*/ true);
6802       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6803                              /*isSigned=*/true);
6804     }
6805     default:
6806       break;
6807     }
6808   } else if (DefaultNT == -1) {
6809     return nullptr;
6810   }
6811 
6812   return Bld.getInt32(DefaultNT);
6813 }
6814 
6815 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6816                                   llvm::Value *DefaultThreadLimitVal) {
6817   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6818       CGF.getContext(), CS->getCapturedStmt());
6819   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6820     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6821       llvm::Value *NumThreads = nullptr;
6822       llvm::Value *CondVal = nullptr;
6823       // Handle if clause. If if clause present, the number of threads is
6824       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6825       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6826         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6827         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6828         const OMPIfClause *IfClause = nullptr;
6829         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6830           if (C->getNameModifier() == OMPD_unknown ||
6831               C->getNameModifier() == OMPD_parallel) {
6832             IfClause = C;
6833             break;
6834           }
6835         }
6836         if (IfClause) {
6837           const Expr *Cond = IfClause->getCondition();
6838           bool Result;
6839           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6840             if (!Result)
6841               return CGF.Builder.getInt32(1);
6842           } else {
6843             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6844             if (const auto *PreInit =
6845                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6846               for (const auto *I : PreInit->decls()) {
6847                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6848                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6849                 } else {
6850                   CodeGenFunction::AutoVarEmission Emission =
6851                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6852                   CGF.EmitAutoVarCleanups(Emission);
6853                 }
6854               }
6855             }
6856             CondVal = CGF.EvaluateExprAsBool(Cond);
6857           }
6858         }
6859       }
6860       // Check the value of num_threads clause iff if clause was not specified
6861       // or is not evaluated to false.
6862       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6863         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6864         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6865         const auto *NumThreadsClause =
6866             Dir->getSingleClause<OMPNumThreadsClause>();
6867         CodeGenFunction::LexicalScope Scope(
6868             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6869         if (const auto *PreInit =
6870                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6871           for (const auto *I : PreInit->decls()) {
6872             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6873               CGF.EmitVarDecl(cast<VarDecl>(*I));
6874             } else {
6875               CodeGenFunction::AutoVarEmission Emission =
6876                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6877               CGF.EmitAutoVarCleanups(Emission);
6878             }
6879           }
6880         }
6881         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6882         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6883                                                /*isSigned=*/false);
6884         if (DefaultThreadLimitVal)
6885           NumThreads = CGF.Builder.CreateSelect(
6886               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6887               DefaultThreadLimitVal, NumThreads);
6888       } else {
6889         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6890                                            : CGF.Builder.getInt32(0);
6891       }
6892       // Process condition of the if clause.
6893       if (CondVal) {
6894         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6895                                               CGF.Builder.getInt32(1));
6896       }
6897       return NumThreads;
6898     }
6899     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6900       return CGF.Builder.getInt32(1);
6901     return DefaultThreadLimitVal;
6902   }
6903   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6904                                : CGF.Builder.getInt32(0);
6905 }
6906 
6907 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6908     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6909     int32_t &DefaultVal) {
6910   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6911   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6912          "Expected target-based executable directive.");
6913 
6914   switch (DirectiveKind) {
6915   case OMPD_target:
6916     // Teams have no clause thread_limit
6917     return nullptr;
6918   case OMPD_target_teams:
6919   case OMPD_target_teams_distribute:
6920     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6921       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6922       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6923       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6924         if (auto Constant =
6925                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6926           DefaultVal = Constant->getExtValue();
6927       return ThreadLimit;
6928     }
6929     return nullptr;
6930   case OMPD_target_parallel:
6931   case OMPD_target_parallel_for:
6932   case OMPD_target_parallel_for_simd:
6933   case OMPD_target_teams_distribute_parallel_for:
6934   case OMPD_target_teams_distribute_parallel_for_simd: {
6935     Expr *ThreadLimit = nullptr;
6936     Expr *NumThreads = nullptr;
6937     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6938       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6939       ThreadLimit = ThreadLimitClause->getThreadLimit();
6940       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6941         if (auto Constant =
6942                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6943           DefaultVal = Constant->getExtValue();
6944     }
6945     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6946       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6947       NumThreads = NumThreadsClause->getNumThreads();
6948       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6949         if (auto Constant =
6950                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6951           if (Constant->getExtValue() < DefaultVal) {
6952             DefaultVal = Constant->getExtValue();
6953             ThreadLimit = NumThreads;
6954           }
6955         }
6956       }
6957     }
6958     return ThreadLimit;
6959   }
6960   case OMPD_target_teams_distribute_simd:
6961   case OMPD_target_simd:
6962     DefaultVal = 1;
6963     return nullptr;
6964   case OMPD_parallel:
6965   case OMPD_for:
6966   case OMPD_parallel_for:
6967   case OMPD_parallel_master:
6968   case OMPD_parallel_sections:
6969   case OMPD_for_simd:
6970   case OMPD_parallel_for_simd:
6971   case OMPD_cancel:
6972   case OMPD_cancellation_point:
6973   case OMPD_ordered:
6974   case OMPD_threadprivate:
6975   case OMPD_allocate:
6976   case OMPD_task:
6977   case OMPD_simd:
6978   case OMPD_tile:
6979   case OMPD_unroll:
6980   case OMPD_sections:
6981   case OMPD_section:
6982   case OMPD_single:
6983   case OMPD_master:
6984   case OMPD_critical:
6985   case OMPD_taskyield:
6986   case OMPD_barrier:
6987   case OMPD_taskwait:
6988   case OMPD_taskgroup:
6989   case OMPD_atomic:
6990   case OMPD_flush:
6991   case OMPD_depobj:
6992   case OMPD_scan:
6993   case OMPD_teams:
6994   case OMPD_target_data:
6995   case OMPD_target_exit_data:
6996   case OMPD_target_enter_data:
6997   case OMPD_distribute:
6998   case OMPD_distribute_simd:
6999   case OMPD_distribute_parallel_for:
7000   case OMPD_distribute_parallel_for_simd:
7001   case OMPD_teams_distribute:
7002   case OMPD_teams_distribute_simd:
7003   case OMPD_teams_distribute_parallel_for:
7004   case OMPD_teams_distribute_parallel_for_simd:
7005   case OMPD_target_update:
7006   case OMPD_declare_simd:
7007   case OMPD_declare_variant:
7008   case OMPD_begin_declare_variant:
7009   case OMPD_end_declare_variant:
7010   case OMPD_declare_target:
7011   case OMPD_end_declare_target:
7012   case OMPD_declare_reduction:
7013   case OMPD_declare_mapper:
7014   case OMPD_taskloop:
7015   case OMPD_taskloop_simd:
7016   case OMPD_master_taskloop:
7017   case OMPD_master_taskloop_simd:
7018   case OMPD_parallel_master_taskloop:
7019   case OMPD_parallel_master_taskloop_simd:
7020   case OMPD_requires:
7021   case OMPD_unknown:
7022     break;
7023   default:
7024     break;
7025   }
7026   llvm_unreachable("Unsupported directive kind.");
7027 }
7028 
7029 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7030     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7031   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7032          "Clauses associated with the teams directive expected to be emitted "
7033          "only for the host!");
7034   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7035   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7036          "Expected target-based executable directive.");
7037   CGBuilderTy &Bld = CGF.Builder;
7038   llvm::Value *ThreadLimitVal = nullptr;
7039   llvm::Value *NumThreadsVal = nullptr;
7040   switch (DirectiveKind) {
7041   case OMPD_target: {
7042     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7043     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7044       return NumThreads;
7045     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7046         CGF.getContext(), CS->getCapturedStmt());
7047     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7048       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7049         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7050         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7051         const auto *ThreadLimitClause =
7052             Dir->getSingleClause<OMPThreadLimitClause>();
7053         CodeGenFunction::LexicalScope Scope(
7054             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7055         if (const auto *PreInit =
7056                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7057           for (const auto *I : PreInit->decls()) {
7058             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7059               CGF.EmitVarDecl(cast<VarDecl>(*I));
7060             } else {
7061               CodeGenFunction::AutoVarEmission Emission =
7062                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7063               CGF.EmitAutoVarCleanups(Emission);
7064             }
7065           }
7066         }
7067         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7068             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7069         ThreadLimitVal =
7070             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7071       }
7072       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7073           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7074         CS = Dir->getInnermostCapturedStmt();
7075         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7076             CGF.getContext(), CS->getCapturedStmt());
7077         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7078       }
7079       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7080           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7081         CS = Dir->getInnermostCapturedStmt();
7082         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7083           return NumThreads;
7084       }
7085       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7086         return Bld.getInt32(1);
7087     }
7088     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7089   }
7090   case OMPD_target_teams: {
7091     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7092       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7093       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7094       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7095           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7096       ThreadLimitVal =
7097           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7098     }
7099     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7100     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7101       return NumThreads;
7102     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7103         CGF.getContext(), CS->getCapturedStmt());
7104     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7105       if (Dir->getDirectiveKind() == OMPD_distribute) {
7106         CS = Dir->getInnermostCapturedStmt();
7107         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7108           return NumThreads;
7109       }
7110     }
7111     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7112   }
7113   case OMPD_target_teams_distribute:
7114     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7115       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7116       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7117       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7118           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7119       ThreadLimitVal =
7120           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7121     }
7122     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7123   case OMPD_target_parallel:
7124   case OMPD_target_parallel_for:
7125   case OMPD_target_parallel_for_simd:
7126   case OMPD_target_teams_distribute_parallel_for:
7127   case OMPD_target_teams_distribute_parallel_for_simd: {
7128     llvm::Value *CondVal = nullptr;
7129     // Handle if clause. If if clause present, the number of threads is
7130     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7131     if (D.hasClausesOfKind<OMPIfClause>()) {
7132       const OMPIfClause *IfClause = nullptr;
7133       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7134         if (C->getNameModifier() == OMPD_unknown ||
7135             C->getNameModifier() == OMPD_parallel) {
7136           IfClause = C;
7137           break;
7138         }
7139       }
7140       if (IfClause) {
7141         const Expr *Cond = IfClause->getCondition();
7142         bool Result;
7143         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7144           if (!Result)
7145             return Bld.getInt32(1);
7146         } else {
7147           CodeGenFunction::RunCleanupsScope Scope(CGF);
7148           CondVal = CGF.EvaluateExprAsBool(Cond);
7149         }
7150       }
7151     }
7152     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7153       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7154       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7155       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7156           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7157       ThreadLimitVal =
7158           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7159     }
7160     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7161       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7162       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7163       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7164           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7165       NumThreadsVal =
7166           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7167       ThreadLimitVal = ThreadLimitVal
7168                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7169                                                                 ThreadLimitVal),
7170                                               NumThreadsVal, ThreadLimitVal)
7171                            : NumThreadsVal;
7172     }
7173     if (!ThreadLimitVal)
7174       ThreadLimitVal = Bld.getInt32(0);
7175     if (CondVal)
7176       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7177     return ThreadLimitVal;
7178   }
7179   case OMPD_target_teams_distribute_simd:
7180   case OMPD_target_simd:
7181     return Bld.getInt32(1);
7182   case OMPD_parallel:
7183   case OMPD_for:
7184   case OMPD_parallel_for:
7185   case OMPD_parallel_master:
7186   case OMPD_parallel_sections:
7187   case OMPD_for_simd:
7188   case OMPD_parallel_for_simd:
7189   case OMPD_cancel:
7190   case OMPD_cancellation_point:
7191   case OMPD_ordered:
7192   case OMPD_threadprivate:
7193   case OMPD_allocate:
7194   case OMPD_task:
7195   case OMPD_simd:
7196   case OMPD_tile:
7197   case OMPD_unroll:
7198   case OMPD_sections:
7199   case OMPD_section:
7200   case OMPD_single:
7201   case OMPD_master:
7202   case OMPD_critical:
7203   case OMPD_taskyield:
7204   case OMPD_barrier:
7205   case OMPD_taskwait:
7206   case OMPD_taskgroup:
7207   case OMPD_atomic:
7208   case OMPD_flush:
7209   case OMPD_depobj:
7210   case OMPD_scan:
7211   case OMPD_teams:
7212   case OMPD_target_data:
7213   case OMPD_target_exit_data:
7214   case OMPD_target_enter_data:
7215   case OMPD_distribute:
7216   case OMPD_distribute_simd:
7217   case OMPD_distribute_parallel_for:
7218   case OMPD_distribute_parallel_for_simd:
7219   case OMPD_teams_distribute:
7220   case OMPD_teams_distribute_simd:
7221   case OMPD_teams_distribute_parallel_for:
7222   case OMPD_teams_distribute_parallel_for_simd:
7223   case OMPD_target_update:
7224   case OMPD_declare_simd:
7225   case OMPD_declare_variant:
7226   case OMPD_begin_declare_variant:
7227   case OMPD_end_declare_variant:
7228   case OMPD_declare_target:
7229   case OMPD_end_declare_target:
7230   case OMPD_declare_reduction:
7231   case OMPD_declare_mapper:
7232   case OMPD_taskloop:
7233   case OMPD_taskloop_simd:
7234   case OMPD_master_taskloop:
7235   case OMPD_master_taskloop_simd:
7236   case OMPD_parallel_master_taskloop:
7237   case OMPD_parallel_master_taskloop_simd:
7238   case OMPD_requires:
7239   case OMPD_metadirective:
7240   case OMPD_unknown:
7241     break;
7242   default:
7243     break;
7244   }
7245   llvm_unreachable("Unsupported directive kind.");
7246 }
7247 
7248 namespace {
7249 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7250 
7251 // Utility to handle information from clauses associated with a given
7252 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7253 // It provides a convenient interface to obtain the information and generate
7254 // code for that information.
7255 class MappableExprsHandler {
7256 public:
7257   /// Values for bit flags used to specify the mapping type for
7258   /// offloading.
7259   enum OpenMPOffloadMappingFlags : uint64_t {
7260     /// No flags
7261     OMP_MAP_NONE = 0x0,
7262     /// Allocate memory on the device and move data from host to device.
7263     OMP_MAP_TO = 0x01,
7264     /// Allocate memory on the device and move data from device to host.
7265     OMP_MAP_FROM = 0x02,
7266     /// Always perform the requested mapping action on the element, even
7267     /// if it was already mapped before.
7268     OMP_MAP_ALWAYS = 0x04,
7269     /// Delete the element from the device environment, ignoring the
7270     /// current reference count associated with the element.
7271     OMP_MAP_DELETE = 0x08,
7272     /// The element being mapped is a pointer-pointee pair; both the
7273     /// pointer and the pointee should be mapped.
7274     OMP_MAP_PTR_AND_OBJ = 0x10,
7275     /// This flags signals that the base address of an entry should be
7276     /// passed to the target kernel as an argument.
7277     OMP_MAP_TARGET_PARAM = 0x20,
7278     /// Signal that the runtime library has to return the device pointer
7279     /// in the current position for the data being mapped. Used when we have the
7280     /// use_device_ptr or use_device_addr clause.
7281     OMP_MAP_RETURN_PARAM = 0x40,
7282     /// This flag signals that the reference being passed is a pointer to
7283     /// private data.
7284     OMP_MAP_PRIVATE = 0x80,
7285     /// Pass the element to the device by value.
7286     OMP_MAP_LITERAL = 0x100,
7287     /// Implicit map
7288     OMP_MAP_IMPLICIT = 0x200,
7289     /// Close is a hint to the runtime to allocate memory close to
7290     /// the target device.
7291     OMP_MAP_CLOSE = 0x400,
7292     /// 0x800 is reserved for compatibility with XLC.
7293     /// Produce a runtime error if the data is not already allocated.
7294     OMP_MAP_PRESENT = 0x1000,
7295     // Increment and decrement a separate reference counter so that the data
7296     // cannot be unmapped within the associated region.  Thus, this flag is
7297     // intended to be used on 'target' and 'target data' directives because they
7298     // are inherently structured.  It is not intended to be used on 'target
7299     // enter data' and 'target exit data' directives because they are inherently
7300     // dynamic.
7301     // This is an OpenMP extension for the sake of OpenACC support.
7302     OMP_MAP_OMPX_HOLD = 0x2000,
7303     /// Signal that the runtime library should use args as an array of
7304     /// descriptor_dim pointers and use args_size as dims. Used when we have
7305     /// non-contiguous list items in target update directive
7306     OMP_MAP_NON_CONTIG = 0x100000000000,
7307     /// The 16 MSBs of the flags indicate whether the entry is member of some
7308     /// struct/class.
7309     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7310     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7311   };
7312 
7313   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7314   static unsigned getFlagMemberOffset() {
7315     unsigned Offset = 0;
7316     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7317          Remain = Remain >> 1)
7318       Offset++;
7319     return Offset;
7320   }
7321 
7322   /// Class that holds debugging information for a data mapping to be passed to
7323   /// the runtime library.
7324   class MappingExprInfo {
7325     /// The variable declaration used for the data mapping.
7326     const ValueDecl *MapDecl = nullptr;
7327     /// The original expression used in the map clause, or null if there is
7328     /// none.
7329     const Expr *MapExpr = nullptr;
7330 
7331   public:
7332     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7333         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7334 
7335     const ValueDecl *getMapDecl() const { return MapDecl; }
7336     const Expr *getMapExpr() const { return MapExpr; }
7337   };
7338 
7339   /// Class that associates information with a base pointer to be passed to the
7340   /// runtime library.
7341   class BasePointerInfo {
7342     /// The base pointer.
7343     llvm::Value *Ptr = nullptr;
7344     /// The base declaration that refers to this device pointer, or null if
7345     /// there is none.
7346     const ValueDecl *DevPtrDecl = nullptr;
7347 
7348   public:
7349     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7350         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7351     llvm::Value *operator*() const { return Ptr; }
7352     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7353     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7354   };
7355 
7356   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7357   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7358   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7359   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7360   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7361   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7362   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7363 
7364   /// This structure contains combined information generated for mappable
7365   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7366   /// mappers, and non-contiguous information.
7367   struct MapCombinedInfoTy {
7368     struct StructNonContiguousInfo {
7369       bool IsNonContiguous = false;
7370       MapDimArrayTy Dims;
7371       MapNonContiguousArrayTy Offsets;
7372       MapNonContiguousArrayTy Counts;
7373       MapNonContiguousArrayTy Strides;
7374     };
7375     MapExprsArrayTy Exprs;
7376     MapBaseValuesArrayTy BasePointers;
7377     MapValuesArrayTy Pointers;
7378     MapValuesArrayTy Sizes;
7379     MapFlagsArrayTy Types;
7380     MapMappersArrayTy Mappers;
7381     StructNonContiguousInfo NonContigInfo;
7382 
7383     /// Append arrays in \a CurInfo.
7384     void append(MapCombinedInfoTy &CurInfo) {
7385       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7386       BasePointers.append(CurInfo.BasePointers.begin(),
7387                           CurInfo.BasePointers.end());
7388       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7389       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7390       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7391       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7392       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7393                                  CurInfo.NonContigInfo.Dims.end());
7394       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7395                                     CurInfo.NonContigInfo.Offsets.end());
7396       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7397                                    CurInfo.NonContigInfo.Counts.end());
7398       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7399                                     CurInfo.NonContigInfo.Strides.end());
7400     }
7401   };
7402 
7403   /// Map between a struct and the its lowest & highest elements which have been
7404   /// mapped.
7405   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7406   ///                    HE(FieldIndex, Pointer)}
7407   struct StructRangeInfoTy {
7408     MapCombinedInfoTy PreliminaryMapData;
7409     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7410         0, Address::invalid()};
7411     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7412         0, Address::invalid()};
7413     Address Base = Address::invalid();
7414     Address LB = Address::invalid();
7415     bool IsArraySection = false;
7416     bool HasCompleteRecord = false;
7417   };
7418 
7419 private:
7420   /// Kind that defines how a device pointer has to be returned.
7421   struct MapInfo {
7422     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7423     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7424     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7425     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7426     bool ReturnDevicePointer = false;
7427     bool IsImplicit = false;
7428     const ValueDecl *Mapper = nullptr;
7429     const Expr *VarRef = nullptr;
7430     bool ForDeviceAddr = false;
7431 
7432     MapInfo() = default;
7433     MapInfo(
7434         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7435         OpenMPMapClauseKind MapType,
7436         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7437         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7438         bool ReturnDevicePointer, bool IsImplicit,
7439         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7440         bool ForDeviceAddr = false)
7441         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7442           MotionModifiers(MotionModifiers),
7443           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7444           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7445   };
7446 
7447   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7448   /// member and there is no map information about it, then emission of that
7449   /// entry is deferred until the whole struct has been processed.
7450   struct DeferredDevicePtrEntryTy {
7451     const Expr *IE = nullptr;
7452     const ValueDecl *VD = nullptr;
7453     bool ForDeviceAddr = false;
7454 
7455     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7456                              bool ForDeviceAddr)
7457         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7458   };
7459 
7460   /// The target directive from where the mappable clauses were extracted. It
7461   /// is either a executable directive or a user-defined mapper directive.
7462   llvm::PointerUnion<const OMPExecutableDirective *,
7463                      const OMPDeclareMapperDecl *>
7464       CurDir;
7465 
7466   /// Function the directive is being generated for.
7467   CodeGenFunction &CGF;
7468 
7469   /// Set of all first private variables in the current directive.
7470   /// bool data is set to true if the variable is implicitly marked as
7471   /// firstprivate, false otherwise.
7472   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7473 
7474   /// Map between device pointer declarations and their expression components.
7475   /// The key value for declarations in 'this' is null.
7476   llvm::DenseMap<
7477       const ValueDecl *,
7478       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7479       DevPointersMap;
7480 
7481   llvm::Value *getExprTypeSize(const Expr *E) const {
7482     QualType ExprTy = E->getType().getCanonicalType();
7483 
7484     // Calculate the size for array shaping expression.
7485     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7486       llvm::Value *Size =
7487           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7488       for (const Expr *SE : OAE->getDimensions()) {
7489         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7490         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7491                                       CGF.getContext().getSizeType(),
7492                                       SE->getExprLoc());
7493         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7494       }
7495       return Size;
7496     }
7497 
7498     // Reference types are ignored for mapping purposes.
7499     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7500       ExprTy = RefTy->getPointeeType().getCanonicalType();
7501 
7502     // Given that an array section is considered a built-in type, we need to
7503     // do the calculation based on the length of the section instead of relying
7504     // on CGF.getTypeSize(E->getType()).
7505     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7506       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7507                             OAE->getBase()->IgnoreParenImpCasts())
7508                             .getCanonicalType();
7509 
7510       // If there is no length associated with the expression and lower bound is
7511       // not specified too, that means we are using the whole length of the
7512       // base.
7513       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7514           !OAE->getLowerBound())
7515         return CGF.getTypeSize(BaseTy);
7516 
7517       llvm::Value *ElemSize;
7518       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7519         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7520       } else {
7521         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7522         assert(ATy && "Expecting array type if not a pointer type.");
7523         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7524       }
7525 
7526       // If we don't have a length at this point, that is because we have an
7527       // array section with a single element.
7528       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7529         return ElemSize;
7530 
7531       if (const Expr *LenExpr = OAE->getLength()) {
7532         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7533         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7534                                              CGF.getContext().getSizeType(),
7535                                              LenExpr->getExprLoc());
7536         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7537       }
7538       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7539              OAE->getLowerBound() && "expected array_section[lb:].");
7540       // Size = sizetype - lb * elemtype;
7541       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7542       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7543       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7544                                        CGF.getContext().getSizeType(),
7545                                        OAE->getLowerBound()->getExprLoc());
7546       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7547       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7548       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7549       LengthVal = CGF.Builder.CreateSelect(
7550           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7551       return LengthVal;
7552     }
7553     return CGF.getTypeSize(ExprTy);
7554   }
7555 
7556   /// Return the corresponding bits for a given map clause modifier. Add
7557   /// a flag marking the map as a pointer if requested. Add a flag marking the
7558   /// map as the first one of a series of maps that relate to the same map
7559   /// expression.
7560   OpenMPOffloadMappingFlags getMapTypeBits(
7561       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7562       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7563       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7564     OpenMPOffloadMappingFlags Bits =
7565         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7566     switch (MapType) {
7567     case OMPC_MAP_alloc:
7568     case OMPC_MAP_release:
7569       // alloc and release is the default behavior in the runtime library,  i.e.
7570       // if we don't pass any bits alloc/release that is what the runtime is
7571       // going to do. Therefore, we don't need to signal anything for these two
7572       // type modifiers.
7573       break;
7574     case OMPC_MAP_to:
7575       Bits |= OMP_MAP_TO;
7576       break;
7577     case OMPC_MAP_from:
7578       Bits |= OMP_MAP_FROM;
7579       break;
7580     case OMPC_MAP_tofrom:
7581       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7582       break;
7583     case OMPC_MAP_delete:
7584       Bits |= OMP_MAP_DELETE;
7585       break;
7586     case OMPC_MAP_unknown:
7587       llvm_unreachable("Unexpected map type!");
7588     }
7589     if (AddPtrFlag)
7590       Bits |= OMP_MAP_PTR_AND_OBJ;
7591     if (AddIsTargetParamFlag)
7592       Bits |= OMP_MAP_TARGET_PARAM;
7593     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7594         != MapModifiers.end())
7595       Bits |= OMP_MAP_ALWAYS;
7596     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7597         != MapModifiers.end())
7598       Bits |= OMP_MAP_CLOSE;
7599     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7600             MapModifiers.end() ||
7601         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7602             MotionModifiers.end())
7603       Bits |= OMP_MAP_PRESENT;
7604     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold) !=
7605         MapModifiers.end())
7606       Bits |= OMP_MAP_OMPX_HOLD;
7607     if (IsNonContiguous)
7608       Bits |= OMP_MAP_NON_CONTIG;
7609     return Bits;
7610   }
7611 
7612   /// Return true if the provided expression is a final array section. A
7613   /// final array section, is one whose length can't be proved to be one.
7614   bool isFinalArraySectionExpression(const Expr *E) const {
7615     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7616 
7617     // It is not an array section and therefore not a unity-size one.
7618     if (!OASE)
7619       return false;
7620 
7621     // An array section with no colon always refer to a single element.
7622     if (OASE->getColonLocFirst().isInvalid())
7623       return false;
7624 
7625     const Expr *Length = OASE->getLength();
7626 
7627     // If we don't have a length we have to check if the array has size 1
7628     // for this dimension. Also, we should always expect a length if the
7629     // base type is pointer.
7630     if (!Length) {
7631       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7632                              OASE->getBase()->IgnoreParenImpCasts())
7633                              .getCanonicalType();
7634       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7635         return ATy->getSize().getSExtValue() != 1;
7636       // If we don't have a constant dimension length, we have to consider
7637       // the current section as having any size, so it is not necessarily
7638       // unitary. If it happen to be unity size, that's user fault.
7639       return true;
7640     }
7641 
7642     // Check if the length evaluates to 1.
7643     Expr::EvalResult Result;
7644     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7645       return true; // Can have more that size 1.
7646 
7647     llvm::APSInt ConstLength = Result.Val.getInt();
7648     return ConstLength.getSExtValue() != 1;
7649   }
7650 
7651   /// Generate the base pointers, section pointers, sizes, map type bits, and
7652   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7653   /// map type, map or motion modifiers, and expression components.
7654   /// \a IsFirstComponent should be set to true if the provided set of
7655   /// components is the first associated with a capture.
7656   void generateInfoForComponentList(
7657       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7658       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7659       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7660       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7661       bool IsFirstComponentList, bool IsImplicit,
7662       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7663       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7664       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7665           OverlappedElements = llvm::None) const {
7666     // The following summarizes what has to be generated for each map and the
7667     // types below. The generated information is expressed in this order:
7668     // base pointer, section pointer, size, flags
7669     // (to add to the ones that come from the map type and modifier).
7670     //
7671     // double d;
7672     // int i[100];
7673     // float *p;
7674     //
7675     // struct S1 {
7676     //   int i;
7677     //   float f[50];
7678     // }
7679     // struct S2 {
7680     //   int i;
7681     //   float f[50];
7682     //   S1 s;
7683     //   double *p;
7684     //   struct S2 *ps;
7685     //   int &ref;
7686     // }
7687     // S2 s;
7688     // S2 *ps;
7689     //
7690     // map(d)
7691     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7692     //
7693     // map(i)
7694     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7695     //
7696     // map(i[1:23])
7697     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7698     //
7699     // map(p)
7700     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7701     //
7702     // map(p[1:24])
7703     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7704     // in unified shared memory mode or for local pointers
7705     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7706     //
7707     // map(s)
7708     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7709     //
7710     // map(s.i)
7711     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7712     //
7713     // map(s.s.f)
7714     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7715     //
7716     // map(s.p)
7717     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7718     //
7719     // map(to: s.p[:22])
7720     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7721     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7722     // &(s.p), &(s.p[0]), 22*sizeof(double),
7723     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7724     // (*) alloc space for struct members, only this is a target parameter
7725     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7726     //      optimizes this entry out, same in the examples below)
7727     // (***) map the pointee (map: to)
7728     //
7729     // map(to: s.ref)
7730     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7731     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7732     // (*) alloc space for struct members, only this is a target parameter
7733     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7734     //      optimizes this entry out, same in the examples below)
7735     // (***) map the pointee (map: to)
7736     //
7737     // map(s.ps)
7738     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7739     //
7740     // map(from: s.ps->s.i)
7741     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7742     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7743     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7744     //
7745     // map(to: s.ps->ps)
7746     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7747     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7748     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7749     //
7750     // map(s.ps->ps->ps)
7751     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7752     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7753     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7754     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7755     //
7756     // map(to: s.ps->ps->s.f[:22])
7757     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7758     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7759     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7760     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7761     //
7762     // map(ps)
7763     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7764     //
7765     // map(ps->i)
7766     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7767     //
7768     // map(ps->s.f)
7769     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7770     //
7771     // map(from: ps->p)
7772     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7773     //
7774     // map(to: ps->p[:22])
7775     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7776     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7777     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7778     //
7779     // map(ps->ps)
7780     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7781     //
7782     // map(from: ps->ps->s.i)
7783     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7784     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7785     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7786     //
7787     // map(from: ps->ps->ps)
7788     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7789     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7790     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7791     //
7792     // map(ps->ps->ps->ps)
7793     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7794     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7795     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7796     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7797     //
7798     // map(to: ps->ps->ps->s.f[:22])
7799     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7800     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7801     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7802     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7803     //
7804     // map(to: s.f[:22]) map(from: s.p[:33])
7805     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7806     //     sizeof(double*) (**), TARGET_PARAM
7807     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7808     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7809     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7810     // (*) allocate contiguous space needed to fit all mapped members even if
7811     //     we allocate space for members not mapped (in this example,
7812     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7813     //     them as well because they fall between &s.f[0] and &s.p)
7814     //
7815     // map(from: s.f[:22]) map(to: ps->p[:33])
7816     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7817     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7818     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7819     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7820     // (*) the struct this entry pertains to is the 2nd element in the list of
7821     //     arguments, hence MEMBER_OF(2)
7822     //
7823     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7824     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7825     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7826     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7827     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7828     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7829     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7830     // (*) the struct this entry pertains to is the 4th element in the list
7831     //     of arguments, hence MEMBER_OF(4)
7832 
7833     // Track if the map information being generated is the first for a capture.
7834     bool IsCaptureFirstInfo = IsFirstComponentList;
7835     // When the variable is on a declare target link or in a to clause with
7836     // unified memory, a reference is needed to hold the host/device address
7837     // of the variable.
7838     bool RequiresReference = false;
7839 
7840     // Scan the components from the base to the complete expression.
7841     auto CI = Components.rbegin();
7842     auto CE = Components.rend();
7843     auto I = CI;
7844 
7845     // Track if the map information being generated is the first for a list of
7846     // components.
7847     bool IsExpressionFirstInfo = true;
7848     bool FirstPointerInComplexData = false;
7849     Address BP = Address::invalid();
7850     const Expr *AssocExpr = I->getAssociatedExpression();
7851     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7852     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7853     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7854 
7855     if (isa<MemberExpr>(AssocExpr)) {
7856       // The base is the 'this' pointer. The content of the pointer is going
7857       // to be the base of the field being mapped.
7858       BP = CGF.LoadCXXThisAddress();
7859     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7860                (OASE &&
7861                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7862       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7863     } else if (OAShE &&
7864                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7865       BP = Address(
7866           CGF.EmitScalarExpr(OAShE->getBase()),
7867           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7868     } else {
7869       // The base is the reference to the variable.
7870       // BP = &Var.
7871       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7872       if (const auto *VD =
7873               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7874         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7875                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7876           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7877               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7878                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7879             RequiresReference = true;
7880             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7881           }
7882         }
7883       }
7884 
7885       // If the variable is a pointer and is being dereferenced (i.e. is not
7886       // the last component), the base has to be the pointer itself, not its
7887       // reference. References are ignored for mapping purposes.
7888       QualType Ty =
7889           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7890       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7891         // No need to generate individual map information for the pointer, it
7892         // can be associated with the combined storage if shared memory mode is
7893         // active or the base declaration is not global variable.
7894         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7895         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7896             !VD || VD->hasLocalStorage())
7897           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7898         else
7899           FirstPointerInComplexData = true;
7900         ++I;
7901       }
7902     }
7903 
7904     // Track whether a component of the list should be marked as MEMBER_OF some
7905     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7906     // in a component list should be marked as MEMBER_OF, all subsequent entries
7907     // do not belong to the base struct. E.g.
7908     // struct S2 s;
7909     // s.ps->ps->ps->f[:]
7910     //   (1) (2) (3) (4)
7911     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7912     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7913     // is the pointee of ps(2) which is not member of struct s, so it should not
7914     // be marked as such (it is still PTR_AND_OBJ).
7915     // The variable is initialized to false so that PTR_AND_OBJ entries which
7916     // are not struct members are not considered (e.g. array of pointers to
7917     // data).
7918     bool ShouldBeMemberOf = false;
7919 
7920     // Variable keeping track of whether or not we have encountered a component
7921     // in the component list which is a member expression. Useful when we have a
7922     // pointer or a final array section, in which case it is the previous
7923     // component in the list which tells us whether we have a member expression.
7924     // E.g. X.f[:]
7925     // While processing the final array section "[:]" it is "f" which tells us
7926     // whether we are dealing with a member of a declared struct.
7927     const MemberExpr *EncounteredME = nullptr;
7928 
7929     // Track for the total number of dimension. Start from one for the dummy
7930     // dimension.
7931     uint64_t DimSize = 1;
7932 
7933     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7934     bool IsPrevMemberReference = false;
7935 
7936     for (; I != CE; ++I) {
7937       // If the current component is member of a struct (parent struct) mark it.
7938       if (!EncounteredME) {
7939         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7940         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7941         // as MEMBER_OF the parent struct.
7942         if (EncounteredME) {
7943           ShouldBeMemberOf = true;
7944           // Do not emit as complex pointer if this is actually not array-like
7945           // expression.
7946           if (FirstPointerInComplexData) {
7947             QualType Ty = std::prev(I)
7948                               ->getAssociatedDeclaration()
7949                               ->getType()
7950                               .getNonReferenceType();
7951             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7952             FirstPointerInComplexData = false;
7953           }
7954         }
7955       }
7956 
7957       auto Next = std::next(I);
7958 
7959       // We need to generate the addresses and sizes if this is the last
7960       // component, if the component is a pointer or if it is an array section
7961       // whose length can't be proved to be one. If this is a pointer, it
7962       // becomes the base address for the following components.
7963 
7964       // A final array section, is one whose length can't be proved to be one.
7965       // If the map item is non-contiguous then we don't treat any array section
7966       // as final array section.
7967       bool IsFinalArraySection =
7968           !IsNonContiguous &&
7969           isFinalArraySectionExpression(I->getAssociatedExpression());
7970 
7971       // If we have a declaration for the mapping use that, otherwise use
7972       // the base declaration of the map clause.
7973       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7974                                      ? I->getAssociatedDeclaration()
7975                                      : BaseDecl;
7976       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7977                                                : MapExpr;
7978 
7979       // Get information on whether the element is a pointer. Have to do a
7980       // special treatment for array sections given that they are built-in
7981       // types.
7982       const auto *OASE =
7983           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7984       const auto *OAShE =
7985           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7986       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7987       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7988       bool IsPointer =
7989           OAShE ||
7990           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7991                        .getCanonicalType()
7992                        ->isAnyPointerType()) ||
7993           I->getAssociatedExpression()->getType()->isAnyPointerType();
7994       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7995                                MapDecl &&
7996                                MapDecl->getType()->isLValueReferenceType();
7997       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7998 
7999       if (OASE)
8000         ++DimSize;
8001 
8002       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8003           IsFinalArraySection) {
8004         // If this is not the last component, we expect the pointer to be
8005         // associated with an array expression or member expression.
8006         assert((Next == CE ||
8007                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8008                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8009                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8010                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8011                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8012                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8013                "Unexpected expression");
8014 
8015         Address LB = Address::invalid();
8016         Address LowestElem = Address::invalid();
8017         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8018                                        const MemberExpr *E) {
8019           const Expr *BaseExpr = E->getBase();
8020           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8021           // scalar.
8022           LValue BaseLV;
8023           if (E->isArrow()) {
8024             LValueBaseInfo BaseInfo;
8025             TBAAAccessInfo TBAAInfo;
8026             Address Addr =
8027                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8028             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8029             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8030           } else {
8031             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8032           }
8033           return BaseLV;
8034         };
8035         if (OAShE) {
8036           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8037                                     CGF.getContext().getTypeAlignInChars(
8038                                         OAShE->getBase()->getType()));
8039         } else if (IsMemberReference) {
8040           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8041           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8042           LowestElem = CGF.EmitLValueForFieldInitialization(
8043                               BaseLVal, cast<FieldDecl>(MapDecl))
8044                            .getAddress(CGF);
8045           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8046                    .getAddress(CGF);
8047         } else {
8048           LowestElem = LB =
8049               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8050                   .getAddress(CGF);
8051         }
8052 
8053         // If this component is a pointer inside the base struct then we don't
8054         // need to create any entry for it - it will be combined with the object
8055         // it is pointing to into a single PTR_AND_OBJ entry.
8056         bool IsMemberPointerOrAddr =
8057             EncounteredME &&
8058             (((IsPointer || ForDeviceAddr) &&
8059               I->getAssociatedExpression() == EncounteredME) ||
8060              (IsPrevMemberReference && !IsPointer) ||
8061              (IsMemberReference && Next != CE &&
8062               !Next->getAssociatedExpression()->getType()->isPointerType()));
8063         if (!OverlappedElements.empty() && Next == CE) {
8064           // Handle base element with the info for overlapped elements.
8065           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8066           assert(!IsPointer &&
8067                  "Unexpected base element with the pointer type.");
8068           // Mark the whole struct as the struct that requires allocation on the
8069           // device.
8070           PartialStruct.LowestElem = {0, LowestElem};
8071           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8072               I->getAssociatedExpression()->getType());
8073           Address HB = CGF.Builder.CreateConstGEP(
8074               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8075                                                               CGF.VoidPtrTy),
8076               TypeSize.getQuantity() - 1);
8077           PartialStruct.HighestElem = {
8078               std::numeric_limits<decltype(
8079                   PartialStruct.HighestElem.first)>::max(),
8080               HB};
8081           PartialStruct.Base = BP;
8082           PartialStruct.LB = LB;
8083           assert(
8084               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8085               "Overlapped elements must be used only once for the variable.");
8086           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8087           // Emit data for non-overlapped data.
8088           OpenMPOffloadMappingFlags Flags =
8089               OMP_MAP_MEMBER_OF |
8090               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8091                              /*AddPtrFlag=*/false,
8092                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8093           llvm::Value *Size = nullptr;
8094           // Do bitcopy of all non-overlapped structure elements.
8095           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8096                    Component : OverlappedElements) {
8097             Address ComponentLB = Address::invalid();
8098             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8099                  Component) {
8100               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8101                 const auto *FD = dyn_cast<FieldDecl>(VD);
8102                 if (FD && FD->getType()->isLValueReferenceType()) {
8103                   const auto *ME =
8104                       cast<MemberExpr>(MC.getAssociatedExpression());
8105                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8106                   ComponentLB =
8107                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8108                           .getAddress(CGF);
8109                 } else {
8110                   ComponentLB =
8111                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8112                           .getAddress(CGF);
8113                 }
8114                 Size = CGF.Builder.CreatePtrDiff(
8115                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8116                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8117                 break;
8118               }
8119             }
8120             assert(Size && "Failed to determine structure size");
8121             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8122             CombinedInfo.BasePointers.push_back(BP.getPointer());
8123             CombinedInfo.Pointers.push_back(LB.getPointer());
8124             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8125                 Size, CGF.Int64Ty, /*isSigned=*/true));
8126             CombinedInfo.Types.push_back(Flags);
8127             CombinedInfo.Mappers.push_back(nullptr);
8128             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8129                                                                       : 1);
8130             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8131           }
8132           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8133           CombinedInfo.BasePointers.push_back(BP.getPointer());
8134           CombinedInfo.Pointers.push_back(LB.getPointer());
8135           Size = CGF.Builder.CreatePtrDiff(
8136               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8137               CGF.EmitCastToVoidPtr(LB.getPointer()));
8138           CombinedInfo.Sizes.push_back(
8139               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8140           CombinedInfo.Types.push_back(Flags);
8141           CombinedInfo.Mappers.push_back(nullptr);
8142           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8143                                                                     : 1);
8144           break;
8145         }
8146         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8147         if (!IsMemberPointerOrAddr ||
8148             (Next == CE && MapType != OMPC_MAP_unknown)) {
8149           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8150           CombinedInfo.BasePointers.push_back(BP.getPointer());
8151           CombinedInfo.Pointers.push_back(LB.getPointer());
8152           CombinedInfo.Sizes.push_back(
8153               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8154           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8155                                                                     : 1);
8156 
8157           // If Mapper is valid, the last component inherits the mapper.
8158           bool HasMapper = Mapper && Next == CE;
8159           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8160 
8161           // We need to add a pointer flag for each map that comes from the
8162           // same expression except for the first one. We also need to signal
8163           // this map is the first one that relates with the current capture
8164           // (there is a set of entries for each capture).
8165           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8166               MapType, MapModifiers, MotionModifiers, IsImplicit,
8167               !IsExpressionFirstInfo || RequiresReference ||
8168                   FirstPointerInComplexData || IsMemberReference,
8169               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8170 
8171           if (!IsExpressionFirstInfo || IsMemberReference) {
8172             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8173             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8174             if (IsPointer || (IsMemberReference && Next != CE))
8175               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8176                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8177 
8178             if (ShouldBeMemberOf) {
8179               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8180               // should be later updated with the correct value of MEMBER_OF.
8181               Flags |= OMP_MAP_MEMBER_OF;
8182               // From now on, all subsequent PTR_AND_OBJ entries should not be
8183               // marked as MEMBER_OF.
8184               ShouldBeMemberOf = false;
8185             }
8186           }
8187 
8188           CombinedInfo.Types.push_back(Flags);
8189         }
8190 
8191         // If we have encountered a member expression so far, keep track of the
8192         // mapped member. If the parent is "*this", then the value declaration
8193         // is nullptr.
8194         if (EncounteredME) {
8195           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8196           unsigned FieldIndex = FD->getFieldIndex();
8197 
8198           // Update info about the lowest and highest elements for this struct
8199           if (!PartialStruct.Base.isValid()) {
8200             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8201             if (IsFinalArraySection) {
8202               Address HB =
8203                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8204                       .getAddress(CGF);
8205               PartialStruct.HighestElem = {FieldIndex, HB};
8206             } else {
8207               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8208             }
8209             PartialStruct.Base = BP;
8210             PartialStruct.LB = BP;
8211           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8212             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8213           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8214             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8215           }
8216         }
8217 
8218         // Need to emit combined struct for array sections.
8219         if (IsFinalArraySection || IsNonContiguous)
8220           PartialStruct.IsArraySection = true;
8221 
8222         // If we have a final array section, we are done with this expression.
8223         if (IsFinalArraySection)
8224           break;
8225 
8226         // The pointer becomes the base for the next element.
8227         if (Next != CE)
8228           BP = IsMemberReference ? LowestElem : LB;
8229 
8230         IsExpressionFirstInfo = false;
8231         IsCaptureFirstInfo = false;
8232         FirstPointerInComplexData = false;
8233         IsPrevMemberReference = IsMemberReference;
8234       } else if (FirstPointerInComplexData) {
8235         QualType Ty = Components.rbegin()
8236                           ->getAssociatedDeclaration()
8237                           ->getType()
8238                           .getNonReferenceType();
8239         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8240         FirstPointerInComplexData = false;
8241       }
8242     }
8243     // If ran into the whole component - allocate the space for the whole
8244     // record.
8245     if (!EncounteredME)
8246       PartialStruct.HasCompleteRecord = true;
8247 
8248     if (!IsNonContiguous)
8249       return;
8250 
8251     const ASTContext &Context = CGF.getContext();
8252 
8253     // For supporting stride in array section, we need to initialize the first
8254     // dimension size as 1, first offset as 0, and first count as 1
8255     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8256     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8257     MapValuesArrayTy CurStrides;
8258     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8259     uint64_t ElementTypeSize;
8260 
8261     // Collect Size information for each dimension and get the element size as
8262     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8263     // should be [10, 10] and the first stride is 4 btyes.
8264     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8265          Components) {
8266       const Expr *AssocExpr = Component.getAssociatedExpression();
8267       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8268 
8269       if (!OASE)
8270         continue;
8271 
8272       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8273       auto *CAT = Context.getAsConstantArrayType(Ty);
8274       auto *VAT = Context.getAsVariableArrayType(Ty);
8275 
8276       // We need all the dimension size except for the last dimension.
8277       assert((VAT || CAT || &Component == &*Components.begin()) &&
8278              "Should be either ConstantArray or VariableArray if not the "
8279              "first Component");
8280 
8281       // Get element size if CurStrides is empty.
8282       if (CurStrides.empty()) {
8283         const Type *ElementType = nullptr;
8284         if (CAT)
8285           ElementType = CAT->getElementType().getTypePtr();
8286         else if (VAT)
8287           ElementType = VAT->getElementType().getTypePtr();
8288         else
8289           assert(&Component == &*Components.begin() &&
8290                  "Only expect pointer (non CAT or VAT) when this is the "
8291                  "first Component");
8292         // If ElementType is null, then it means the base is a pointer
8293         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8294         // for next iteration.
8295         if (ElementType) {
8296           // For the case that having pointer as base, we need to remove one
8297           // level of indirection.
8298           if (&Component != &*Components.begin())
8299             ElementType = ElementType->getPointeeOrArrayElementType();
8300           ElementTypeSize =
8301               Context.getTypeSizeInChars(ElementType).getQuantity();
8302           CurStrides.push_back(
8303               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8304         }
8305       }
8306       // Get dimension value except for the last dimension since we don't need
8307       // it.
8308       if (DimSizes.size() < Components.size() - 1) {
8309         if (CAT)
8310           DimSizes.push_back(llvm::ConstantInt::get(
8311               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8312         else if (VAT)
8313           DimSizes.push_back(CGF.Builder.CreateIntCast(
8314               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8315               /*IsSigned=*/false));
8316       }
8317     }
8318 
8319     // Skip the dummy dimension since we have already have its information.
8320     auto DI = DimSizes.begin() + 1;
8321     // Product of dimension.
8322     llvm::Value *DimProd =
8323         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8324 
8325     // Collect info for non-contiguous. Notice that offset, count, and stride
8326     // are only meaningful for array-section, so we insert a null for anything
8327     // other than array-section.
8328     // Also, the size of offset, count, and stride are not the same as
8329     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8330     // count, and stride are the same as the number of non-contiguous
8331     // declaration in target update to/from clause.
8332     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8333          Components) {
8334       const Expr *AssocExpr = Component.getAssociatedExpression();
8335 
8336       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8337         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8338             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8339             /*isSigned=*/false);
8340         CurOffsets.push_back(Offset);
8341         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8342         CurStrides.push_back(CurStrides.back());
8343         continue;
8344       }
8345 
8346       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8347 
8348       if (!OASE)
8349         continue;
8350 
8351       // Offset
8352       const Expr *OffsetExpr = OASE->getLowerBound();
8353       llvm::Value *Offset = nullptr;
8354       if (!OffsetExpr) {
8355         // If offset is absent, then we just set it to zero.
8356         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8357       } else {
8358         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8359                                            CGF.Int64Ty,
8360                                            /*isSigned=*/false);
8361       }
8362       CurOffsets.push_back(Offset);
8363 
8364       // Count
8365       const Expr *CountExpr = OASE->getLength();
8366       llvm::Value *Count = nullptr;
8367       if (!CountExpr) {
8368         // In Clang, once a high dimension is an array section, we construct all
8369         // the lower dimension as array section, however, for case like
8370         // arr[0:2][2], Clang construct the inner dimension as an array section
8371         // but it actually is not in an array section form according to spec.
8372         if (!OASE->getColonLocFirst().isValid() &&
8373             !OASE->getColonLocSecond().isValid()) {
8374           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8375         } else {
8376           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8377           // When the length is absent it defaults to ⌈(size −
8378           // lower-bound)/stride⌉, where size is the size of the array
8379           // dimension.
8380           const Expr *StrideExpr = OASE->getStride();
8381           llvm::Value *Stride =
8382               StrideExpr
8383                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8384                                               CGF.Int64Ty, /*isSigned=*/false)
8385                   : nullptr;
8386           if (Stride)
8387             Count = CGF.Builder.CreateUDiv(
8388                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8389           else
8390             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8391         }
8392       } else {
8393         Count = CGF.EmitScalarExpr(CountExpr);
8394       }
8395       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8396       CurCounts.push_back(Count);
8397 
8398       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8399       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8400       //              Offset      Count     Stride
8401       //    D0          0           1         4    (int)    <- dummy dimension
8402       //    D1          0           2         8    (2 * (1) * 4)
8403       //    D2          1           2         20   (1 * (1 * 5) * 4)
8404       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8405       const Expr *StrideExpr = OASE->getStride();
8406       llvm::Value *Stride =
8407           StrideExpr
8408               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8409                                           CGF.Int64Ty, /*isSigned=*/false)
8410               : nullptr;
8411       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8412       if (Stride)
8413         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8414       else
8415         CurStrides.push_back(DimProd);
8416       if (DI != DimSizes.end())
8417         ++DI;
8418     }
8419 
8420     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8421     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8422     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8423   }
8424 
8425   /// Return the adjusted map modifiers if the declaration a capture refers to
8426   /// appears in a first-private clause. This is expected to be used only with
8427   /// directives that start with 'target'.
8428   MappableExprsHandler::OpenMPOffloadMappingFlags
8429   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8430     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8431 
8432     // A first private variable captured by reference will use only the
8433     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8434     // declaration is known as first-private in this handler.
8435     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8436       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8437         return MappableExprsHandler::OMP_MAP_TO |
8438                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8439       return MappableExprsHandler::OMP_MAP_PRIVATE |
8440              MappableExprsHandler::OMP_MAP_TO;
8441     }
8442     return MappableExprsHandler::OMP_MAP_TO |
8443            MappableExprsHandler::OMP_MAP_FROM;
8444   }
8445 
8446   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8447     // Rotate by getFlagMemberOffset() bits.
8448     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8449                                                   << getFlagMemberOffset());
8450   }
8451 
8452   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8453                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8454     // If the entry is PTR_AND_OBJ but has not been marked with the special
8455     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8456     // marked as MEMBER_OF.
8457     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8458         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8459       return;
8460 
8461     // Reset the placeholder value to prepare the flag for the assignment of the
8462     // proper MEMBER_OF value.
8463     Flags &= ~OMP_MAP_MEMBER_OF;
8464     Flags |= MemberOfFlag;
8465   }
8466 
8467   void getPlainLayout(const CXXRecordDecl *RD,
8468                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8469                       bool AsBase) const {
8470     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8471 
8472     llvm::StructType *St =
8473         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8474 
8475     unsigned NumElements = St->getNumElements();
8476     llvm::SmallVector<
8477         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8478         RecordLayout(NumElements);
8479 
8480     // Fill bases.
8481     for (const auto &I : RD->bases()) {
8482       if (I.isVirtual())
8483         continue;
8484       const auto *Base = I.getType()->getAsCXXRecordDecl();
8485       // Ignore empty bases.
8486       if (Base->isEmpty() || CGF.getContext()
8487                                  .getASTRecordLayout(Base)
8488                                  .getNonVirtualSize()
8489                                  .isZero())
8490         continue;
8491 
8492       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8493       RecordLayout[FieldIndex] = Base;
8494     }
8495     // Fill in virtual bases.
8496     for (const auto &I : RD->vbases()) {
8497       const auto *Base = I.getType()->getAsCXXRecordDecl();
8498       // Ignore empty bases.
8499       if (Base->isEmpty())
8500         continue;
8501       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8502       if (RecordLayout[FieldIndex])
8503         continue;
8504       RecordLayout[FieldIndex] = Base;
8505     }
8506     // Fill in all the fields.
8507     assert(!RD->isUnion() && "Unexpected union.");
8508     for (const auto *Field : RD->fields()) {
8509       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8510       // will fill in later.)
8511       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8512         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8513         RecordLayout[FieldIndex] = Field;
8514       }
8515     }
8516     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8517              &Data : RecordLayout) {
8518       if (Data.isNull())
8519         continue;
8520       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8521         getPlainLayout(Base, Layout, /*AsBase=*/true);
8522       else
8523         Layout.push_back(Data.get<const FieldDecl *>());
8524     }
8525   }
8526 
8527   /// Generate all the base pointers, section pointers, sizes, map types, and
8528   /// mappers for the extracted mappable expressions (all included in \a
8529   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8530   /// pair of the relevant declaration and index where it occurs is appended to
8531   /// the device pointers info array.
8532   void generateAllInfoForClauses(
8533       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8534       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8535           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8536     // We have to process the component lists that relate with the same
8537     // declaration in a single chunk so that we can generate the map flags
8538     // correctly. Therefore, we organize all lists in a map.
8539     enum MapKind { Present, Allocs, Other, Total };
8540     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8541                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8542         Info;
8543 
8544     // Helper function to fill the information map for the different supported
8545     // clauses.
8546     auto &&InfoGen =
8547         [&Info, &SkipVarSet](
8548             const ValueDecl *D, MapKind Kind,
8549             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8550             OpenMPMapClauseKind MapType,
8551             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8552             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8553             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8554             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8555           if (SkipVarSet.contains(D))
8556             return;
8557           auto It = Info.find(D);
8558           if (It == Info.end())
8559             It = Info
8560                      .insert(std::make_pair(
8561                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8562                      .first;
8563           It->second[Kind].emplace_back(
8564               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8565               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8566         };
8567 
8568     for (const auto *Cl : Clauses) {
8569       const auto *C = dyn_cast<OMPMapClause>(Cl);
8570       if (!C)
8571         continue;
8572       MapKind Kind = Other;
8573       if (!C->getMapTypeModifiers().empty() &&
8574           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8575             return K == OMPC_MAP_MODIFIER_present;
8576           }))
8577         Kind = Present;
8578       else if (C->getMapType() == OMPC_MAP_alloc)
8579         Kind = Allocs;
8580       const auto *EI = C->getVarRefs().begin();
8581       for (const auto L : C->component_lists()) {
8582         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8583         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8584                 C->getMapTypeModifiers(), llvm::None,
8585                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8586                 E);
8587         ++EI;
8588       }
8589     }
8590     for (const auto *Cl : Clauses) {
8591       const auto *C = dyn_cast<OMPToClause>(Cl);
8592       if (!C)
8593         continue;
8594       MapKind Kind = Other;
8595       if (!C->getMotionModifiers().empty() &&
8596           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8597             return K == OMPC_MOTION_MODIFIER_present;
8598           }))
8599         Kind = Present;
8600       const auto *EI = C->getVarRefs().begin();
8601       for (const auto L : C->component_lists()) {
8602         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8603                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8604                 C->isImplicit(), std::get<2>(L), *EI);
8605         ++EI;
8606       }
8607     }
8608     for (const auto *Cl : Clauses) {
8609       const auto *C = dyn_cast<OMPFromClause>(Cl);
8610       if (!C)
8611         continue;
8612       MapKind Kind = Other;
8613       if (!C->getMotionModifiers().empty() &&
8614           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8615             return K == OMPC_MOTION_MODIFIER_present;
8616           }))
8617         Kind = Present;
8618       const auto *EI = C->getVarRefs().begin();
8619       for (const auto L : C->component_lists()) {
8620         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8621                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8622                 C->isImplicit(), std::get<2>(L), *EI);
8623         ++EI;
8624       }
8625     }
8626 
8627     // Look at the use_device_ptr clause information and mark the existing map
8628     // entries as such. If there is no map information for an entry in the
8629     // use_device_ptr list, we create one with map type 'alloc' and zero size
8630     // section. It is the user fault if that was not mapped before. If there is
8631     // no map information and the pointer is a struct member, then we defer the
8632     // emission of that entry until the whole struct has been processed.
8633     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8634                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8635         DeferredInfo;
8636     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8637 
8638     for (const auto *Cl : Clauses) {
8639       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8640       if (!C)
8641         continue;
8642       for (const auto L : C->component_lists()) {
8643         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8644             std::get<1>(L);
8645         assert(!Components.empty() &&
8646                "Not expecting empty list of components!");
8647         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8648         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8649         const Expr *IE = Components.back().getAssociatedExpression();
8650         // If the first component is a member expression, we have to look into
8651         // 'this', which maps to null in the map of map information. Otherwise
8652         // look directly for the information.
8653         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8654 
8655         // We potentially have map information for this declaration already.
8656         // Look for the first set of components that refer to it.
8657         if (It != Info.end()) {
8658           bool Found = false;
8659           for (auto &Data : It->second) {
8660             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8661               return MI.Components.back().getAssociatedDeclaration() == VD;
8662             });
8663             // If we found a map entry, signal that the pointer has to be
8664             // returned and move on to the next declaration. Exclude cases where
8665             // the base pointer is mapped as array subscript, array section or
8666             // array shaping. The base address is passed as a pointer to base in
8667             // this case and cannot be used as a base for use_device_ptr list
8668             // item.
8669             if (CI != Data.end()) {
8670               auto PrevCI = std::next(CI->Components.rbegin());
8671               const auto *VarD = dyn_cast<VarDecl>(VD);
8672               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8673                   isa<MemberExpr>(IE) ||
8674                   !VD->getType().getNonReferenceType()->isPointerType() ||
8675                   PrevCI == CI->Components.rend() ||
8676                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8677                   VarD->hasLocalStorage()) {
8678                 CI->ReturnDevicePointer = true;
8679                 Found = true;
8680                 break;
8681               }
8682             }
8683           }
8684           if (Found)
8685             continue;
8686         }
8687 
8688         // We didn't find any match in our map information - generate a zero
8689         // size array section - if the pointer is a struct member we defer this
8690         // action until the whole struct has been processed.
8691         if (isa<MemberExpr>(IE)) {
8692           // Insert the pointer into Info to be processed by
8693           // generateInfoForComponentList. Because it is a member pointer
8694           // without a pointee, no entry will be generated for it, therefore
8695           // we need to generate one after the whole struct has been processed.
8696           // Nonetheless, generateInfoForComponentList must be called to take
8697           // the pointer into account for the calculation of the range of the
8698           // partial struct.
8699           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8700                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8701                   nullptr);
8702           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8703         } else {
8704           llvm::Value *Ptr =
8705               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8706           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8707           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8708           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8709           UseDevicePtrCombinedInfo.Sizes.push_back(
8710               llvm::Constant::getNullValue(CGF.Int64Ty));
8711           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8712           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8713         }
8714       }
8715     }
8716 
8717     // Look at the use_device_addr clause information and mark the existing map
8718     // entries as such. If there is no map information for an entry in the
8719     // use_device_addr list, we create one with map type 'alloc' and zero size
8720     // section. It is the user fault if that was not mapped before. If there is
8721     // no map information and the pointer is a struct member, then we defer the
8722     // emission of that entry until the whole struct has been processed.
8723     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8724     for (const auto *Cl : Clauses) {
8725       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8726       if (!C)
8727         continue;
8728       for (const auto L : C->component_lists()) {
8729         assert(!std::get<1>(L).empty() &&
8730                "Not expecting empty list of components!");
8731         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8732         if (!Processed.insert(VD).second)
8733           continue;
8734         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8735         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8736         // If the first component is a member expression, we have to look into
8737         // 'this', which maps to null in the map of map information. Otherwise
8738         // look directly for the information.
8739         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8740 
8741         // We potentially have map information for this declaration already.
8742         // Look for the first set of components that refer to it.
8743         if (It != Info.end()) {
8744           bool Found = false;
8745           for (auto &Data : It->second) {
8746             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8747               return MI.Components.back().getAssociatedDeclaration() == VD;
8748             });
8749             // If we found a map entry, signal that the pointer has to be
8750             // returned and move on to the next declaration.
8751             if (CI != Data.end()) {
8752               CI->ReturnDevicePointer = true;
8753               Found = true;
8754               break;
8755             }
8756           }
8757           if (Found)
8758             continue;
8759         }
8760 
8761         // We didn't find any match in our map information - generate a zero
8762         // size array section - if the pointer is a struct member we defer this
8763         // action until the whole struct has been processed.
8764         if (isa<MemberExpr>(IE)) {
8765           // Insert the pointer into Info to be processed by
8766           // generateInfoForComponentList. Because it is a member pointer
8767           // without a pointee, no entry will be generated for it, therefore
8768           // we need to generate one after the whole struct has been processed.
8769           // Nonetheless, generateInfoForComponentList must be called to take
8770           // the pointer into account for the calculation of the range of the
8771           // partial struct.
8772           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8773                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8774                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8775           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8776         } else {
8777           llvm::Value *Ptr;
8778           if (IE->isGLValue())
8779             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8780           else
8781             Ptr = CGF.EmitScalarExpr(IE);
8782           CombinedInfo.Exprs.push_back(VD);
8783           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8784           CombinedInfo.Pointers.push_back(Ptr);
8785           CombinedInfo.Sizes.push_back(
8786               llvm::Constant::getNullValue(CGF.Int64Ty));
8787           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8788           CombinedInfo.Mappers.push_back(nullptr);
8789         }
8790       }
8791     }
8792 
8793     for (const auto &Data : Info) {
8794       StructRangeInfoTy PartialStruct;
8795       // Temporary generated information.
8796       MapCombinedInfoTy CurInfo;
8797       const Decl *D = Data.first;
8798       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8799       for (const auto &M : Data.second) {
8800         for (const MapInfo &L : M) {
8801           assert(!L.Components.empty() &&
8802                  "Not expecting declaration with no component lists.");
8803 
8804           // Remember the current base pointer index.
8805           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8806           CurInfo.NonContigInfo.IsNonContiguous =
8807               L.Components.back().isNonContiguous();
8808           generateInfoForComponentList(
8809               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8810               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8811               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8812 
8813           // If this entry relates with a device pointer, set the relevant
8814           // declaration and add the 'return pointer' flag.
8815           if (L.ReturnDevicePointer) {
8816             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8817                    "Unexpected number of mapped base pointers.");
8818 
8819             const ValueDecl *RelevantVD =
8820                 L.Components.back().getAssociatedDeclaration();
8821             assert(RelevantVD &&
8822                    "No relevant declaration related with device pointer??");
8823 
8824             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8825                 RelevantVD);
8826             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8827           }
8828         }
8829       }
8830 
8831       // Append any pending zero-length pointers which are struct members and
8832       // used with use_device_ptr or use_device_addr.
8833       auto CI = DeferredInfo.find(Data.first);
8834       if (CI != DeferredInfo.end()) {
8835         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8836           llvm::Value *BasePtr;
8837           llvm::Value *Ptr;
8838           if (L.ForDeviceAddr) {
8839             if (L.IE->isGLValue())
8840               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8841             else
8842               Ptr = this->CGF.EmitScalarExpr(L.IE);
8843             BasePtr = Ptr;
8844             // Entry is RETURN_PARAM. Also, set the placeholder value
8845             // MEMBER_OF=FFFF so that the entry is later updated with the
8846             // correct value of MEMBER_OF.
8847             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8848           } else {
8849             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8850             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8851                                              L.IE->getExprLoc());
8852             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8853             // placeholder value MEMBER_OF=FFFF so that the entry is later
8854             // updated with the correct value of MEMBER_OF.
8855             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8856                                     OMP_MAP_MEMBER_OF);
8857           }
8858           CurInfo.Exprs.push_back(L.VD);
8859           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8860           CurInfo.Pointers.push_back(Ptr);
8861           CurInfo.Sizes.push_back(
8862               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8863           CurInfo.Mappers.push_back(nullptr);
8864         }
8865       }
8866       // If there is an entry in PartialStruct it means we have a struct with
8867       // individual members mapped. Emit an extra combined entry.
8868       if (PartialStruct.Base.isValid()) {
8869         CurInfo.NonContigInfo.Dims.push_back(0);
8870         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8871       }
8872 
8873       // We need to append the results of this capture to what we already
8874       // have.
8875       CombinedInfo.append(CurInfo);
8876     }
8877     // Append data for use_device_ptr clauses.
8878     CombinedInfo.append(UseDevicePtrCombinedInfo);
8879   }
8880 
8881 public:
8882   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8883       : CurDir(&Dir), CGF(CGF) {
8884     // Extract firstprivate clause information.
8885     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8886       for (const auto *D : C->varlists())
8887         FirstPrivateDecls.try_emplace(
8888             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8889     // Extract implicit firstprivates from uses_allocators clauses.
8890     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8891       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8892         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8893         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8894           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8895                                         /*Implicit=*/true);
8896         else if (const auto *VD = dyn_cast<VarDecl>(
8897                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8898                          ->getDecl()))
8899           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8900       }
8901     }
8902     // Extract device pointer clause information.
8903     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8904       for (auto L : C->component_lists())
8905         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8906   }
8907 
8908   /// Constructor for the declare mapper directive.
8909   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8910       : CurDir(&Dir), CGF(CGF) {}
8911 
8912   /// Generate code for the combined entry if we have a partially mapped struct
8913   /// and take care of the mapping flags of the arguments corresponding to
8914   /// individual struct members.
8915   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8916                          MapFlagsArrayTy &CurTypes,
8917                          const StructRangeInfoTy &PartialStruct,
8918                          const ValueDecl *VD = nullptr,
8919                          bool NotTargetParams = true) const {
8920     if (CurTypes.size() == 1 &&
8921         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8922         !PartialStruct.IsArraySection)
8923       return;
8924     Address LBAddr = PartialStruct.LowestElem.second;
8925     Address HBAddr = PartialStruct.HighestElem.second;
8926     if (PartialStruct.HasCompleteRecord) {
8927       LBAddr = PartialStruct.LB;
8928       HBAddr = PartialStruct.LB;
8929     }
8930     CombinedInfo.Exprs.push_back(VD);
8931     // Base is the base of the struct
8932     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8933     // Pointer is the address of the lowest element
8934     llvm::Value *LB = LBAddr.getPointer();
8935     CombinedInfo.Pointers.push_back(LB);
8936     // There should not be a mapper for a combined entry.
8937     CombinedInfo.Mappers.push_back(nullptr);
8938     // Size is (addr of {highest+1} element) - (addr of lowest element)
8939     llvm::Value *HB = HBAddr.getPointer();
8940     llvm::Value *HAddr =
8941         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8942     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8943     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8944     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8945     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8946                                                   /*isSigned=*/false);
8947     CombinedInfo.Sizes.push_back(Size);
8948     // Map type is always TARGET_PARAM, if generate info for captures.
8949     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8950                                                  : OMP_MAP_TARGET_PARAM);
8951     // If any element has the present modifier, then make sure the runtime
8952     // doesn't attempt to allocate the struct.
8953     if (CurTypes.end() !=
8954         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8955           return Type & OMP_MAP_PRESENT;
8956         }))
8957       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8958     // Remove TARGET_PARAM flag from the first element
8959     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8960     // If any element has the ompx_hold modifier, then make sure the runtime
8961     // uses the hold reference count for the struct as a whole so that it won't
8962     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8963     // elements as well so the runtime knows which reference count to check
8964     // when determining whether it's time for device-to-host transfers of
8965     // individual elements.
8966     if (CurTypes.end() !=
8967         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8968           return Type & OMP_MAP_OMPX_HOLD;
8969         })) {
8970       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8971       for (auto &M : CurTypes)
8972         M |= OMP_MAP_OMPX_HOLD;
8973     }
8974 
8975     // All other current entries will be MEMBER_OF the combined entry
8976     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8977     // 0xFFFF in the MEMBER_OF field).
8978     OpenMPOffloadMappingFlags MemberOfFlag =
8979         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8980     for (auto &M : CurTypes)
8981       setCorrectMemberOfFlag(M, MemberOfFlag);
8982   }
8983 
8984   /// Generate all the base pointers, section pointers, sizes, map types, and
8985   /// mappers for the extracted mappable expressions (all included in \a
8986   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8987   /// pair of the relevant declaration and index where it occurs is appended to
8988   /// the device pointers info array.
8989   void generateAllInfo(
8990       MapCombinedInfoTy &CombinedInfo,
8991       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8992           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8993     assert(CurDir.is<const OMPExecutableDirective *>() &&
8994            "Expect a executable directive");
8995     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8996     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8997   }
8998 
8999   /// Generate all the base pointers, section pointers, sizes, map types, and
9000   /// mappers for the extracted map clauses of user-defined mapper (all included
9001   /// in \a CombinedInfo).
9002   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9003     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9004            "Expect a declare mapper directive");
9005     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9006     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9007   }
9008 
9009   /// Emit capture info for lambdas for variables captured by reference.
9010   void generateInfoForLambdaCaptures(
9011       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9012       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9013     const auto *RD = VD->getType()
9014                          .getCanonicalType()
9015                          .getNonReferenceType()
9016                          ->getAsCXXRecordDecl();
9017     if (!RD || !RD->isLambda())
9018       return;
9019     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
9020     LValue VDLVal = CGF.MakeAddrLValue(
9021         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9022     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9023     FieldDecl *ThisCapture = nullptr;
9024     RD->getCaptureFields(Captures, ThisCapture);
9025     if (ThisCapture) {
9026       LValue ThisLVal =
9027           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9028       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9029       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9030                                  VDLVal.getPointer(CGF));
9031       CombinedInfo.Exprs.push_back(VD);
9032       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9033       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9034       CombinedInfo.Sizes.push_back(
9035           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9036                                     CGF.Int64Ty, /*isSigned=*/true));
9037       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9038                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9039       CombinedInfo.Mappers.push_back(nullptr);
9040     }
9041     for (const LambdaCapture &LC : RD->captures()) {
9042       if (!LC.capturesVariable())
9043         continue;
9044       const VarDecl *VD = LC.getCapturedVar();
9045       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9046         continue;
9047       auto It = Captures.find(VD);
9048       assert(It != Captures.end() && "Found lambda capture without field.");
9049       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9050       if (LC.getCaptureKind() == LCK_ByRef) {
9051         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9052         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9053                                    VDLVal.getPointer(CGF));
9054         CombinedInfo.Exprs.push_back(VD);
9055         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9056         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9057         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9058             CGF.getTypeSize(
9059                 VD->getType().getCanonicalType().getNonReferenceType()),
9060             CGF.Int64Ty, /*isSigned=*/true));
9061       } else {
9062         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9063         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9064                                    VDLVal.getPointer(CGF));
9065         CombinedInfo.Exprs.push_back(VD);
9066         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9067         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9068         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9069       }
9070       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9071                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9072       CombinedInfo.Mappers.push_back(nullptr);
9073     }
9074   }
9075 
9076   /// Set correct indices for lambdas captures.
9077   void adjustMemberOfForLambdaCaptures(
9078       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9079       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9080       MapFlagsArrayTy &Types) const {
9081     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9082       // Set correct member_of idx for all implicit lambda captures.
9083       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9084                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9085         continue;
9086       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9087       assert(BasePtr && "Unable to find base lambda address.");
9088       int TgtIdx = -1;
9089       for (unsigned J = I; J > 0; --J) {
9090         unsigned Idx = J - 1;
9091         if (Pointers[Idx] != BasePtr)
9092           continue;
9093         TgtIdx = Idx;
9094         break;
9095       }
9096       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9097       // All other current entries will be MEMBER_OF the combined entry
9098       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9099       // 0xFFFF in the MEMBER_OF field).
9100       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9101       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9102     }
9103   }
9104 
9105   /// Generate the base pointers, section pointers, sizes, map types, and
9106   /// mappers associated to a given capture (all included in \a CombinedInfo).
9107   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9108                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9109                               StructRangeInfoTy &PartialStruct) const {
9110     assert(!Cap->capturesVariableArrayType() &&
9111            "Not expecting to generate map info for a variable array type!");
9112 
9113     // We need to know when we generating information for the first component
9114     const ValueDecl *VD = Cap->capturesThis()
9115                               ? nullptr
9116                               : Cap->getCapturedVar()->getCanonicalDecl();
9117 
9118     // If this declaration appears in a is_device_ptr clause we just have to
9119     // pass the pointer by value. If it is a reference to a declaration, we just
9120     // pass its value.
9121     if (DevPointersMap.count(VD)) {
9122       CombinedInfo.Exprs.push_back(VD);
9123       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9124       CombinedInfo.Pointers.push_back(Arg);
9125       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9126           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9127           /*isSigned=*/true));
9128       CombinedInfo.Types.push_back(
9129           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9130           OMP_MAP_TARGET_PARAM);
9131       CombinedInfo.Mappers.push_back(nullptr);
9132       return;
9133     }
9134 
9135     using MapData =
9136         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9137                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9138                    const ValueDecl *, const Expr *>;
9139     SmallVector<MapData, 4> DeclComponentLists;
9140     assert(CurDir.is<const OMPExecutableDirective *>() &&
9141            "Expect a executable directive");
9142     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9143     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9144       const auto *EI = C->getVarRefs().begin();
9145       for (const auto L : C->decl_component_lists(VD)) {
9146         const ValueDecl *VDecl, *Mapper;
9147         // The Expression is not correct if the mapping is implicit
9148         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9149         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9150         std::tie(VDecl, Components, Mapper) = L;
9151         assert(VDecl == VD && "We got information for the wrong declaration??");
9152         assert(!Components.empty() &&
9153                "Not expecting declaration with no component lists.");
9154         DeclComponentLists.emplace_back(Components, C->getMapType(),
9155                                         C->getMapTypeModifiers(),
9156                                         C->isImplicit(), Mapper, E);
9157         ++EI;
9158       }
9159     }
9160     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9161                                              const MapData &RHS) {
9162       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9163       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9164       bool HasPresent = !MapModifiers.empty() &&
9165                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9166                           return K == clang::OMPC_MAP_MODIFIER_present;
9167                         });
9168       bool HasAllocs = MapType == OMPC_MAP_alloc;
9169       MapModifiers = std::get<2>(RHS);
9170       MapType = std::get<1>(LHS);
9171       bool HasPresentR =
9172           !MapModifiers.empty() &&
9173           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9174             return K == clang::OMPC_MAP_MODIFIER_present;
9175           });
9176       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9177       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9178     });
9179 
9180     // Find overlapping elements (including the offset from the base element).
9181     llvm::SmallDenseMap<
9182         const MapData *,
9183         llvm::SmallVector<
9184             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9185         4>
9186         OverlappedData;
9187     size_t Count = 0;
9188     for (const MapData &L : DeclComponentLists) {
9189       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9190       OpenMPMapClauseKind MapType;
9191       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9192       bool IsImplicit;
9193       const ValueDecl *Mapper;
9194       const Expr *VarRef;
9195       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9196           L;
9197       ++Count;
9198       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9199         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9200         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9201                  VarRef) = L1;
9202         auto CI = Components.rbegin();
9203         auto CE = Components.rend();
9204         auto SI = Components1.rbegin();
9205         auto SE = Components1.rend();
9206         for (; CI != CE && SI != SE; ++CI, ++SI) {
9207           if (CI->getAssociatedExpression()->getStmtClass() !=
9208               SI->getAssociatedExpression()->getStmtClass())
9209             break;
9210           // Are we dealing with different variables/fields?
9211           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9212             break;
9213         }
9214         // Found overlapping if, at least for one component, reached the head
9215         // of the components list.
9216         if (CI == CE || SI == SE) {
9217           // Ignore it if it is the same component.
9218           if (CI == CE && SI == SE)
9219             continue;
9220           const auto It = (SI == SE) ? CI : SI;
9221           // If one component is a pointer and another one is a kind of
9222           // dereference of this pointer (array subscript, section, dereference,
9223           // etc.), it is not an overlapping.
9224           // Same, if one component is a base and another component is a
9225           // dereferenced pointer memberexpr with the same base.
9226           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9227               (std::prev(It)->getAssociatedDeclaration() &&
9228                std::prev(It)
9229                    ->getAssociatedDeclaration()
9230                    ->getType()
9231                    ->isPointerType()) ||
9232               (It->getAssociatedDeclaration() &&
9233                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9234                std::next(It) != CE && std::next(It) != SE))
9235             continue;
9236           const MapData &BaseData = CI == CE ? L : L1;
9237           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9238               SI == SE ? Components : Components1;
9239           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9240           OverlappedElements.getSecond().push_back(SubData);
9241         }
9242       }
9243     }
9244     // Sort the overlapped elements for each item.
9245     llvm::SmallVector<const FieldDecl *, 4> Layout;
9246     if (!OverlappedData.empty()) {
9247       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9248       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9249       while (BaseType != OrigType) {
9250         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9251         OrigType = BaseType->getPointeeOrArrayElementType();
9252       }
9253 
9254       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9255         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9256       else {
9257         const auto *RD = BaseType->getAsRecordDecl();
9258         Layout.append(RD->field_begin(), RD->field_end());
9259       }
9260     }
9261     for (auto &Pair : OverlappedData) {
9262       llvm::stable_sort(
9263           Pair.getSecond(),
9264           [&Layout](
9265               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9266               OMPClauseMappableExprCommon::MappableExprComponentListRef
9267                   Second) {
9268             auto CI = First.rbegin();
9269             auto CE = First.rend();
9270             auto SI = Second.rbegin();
9271             auto SE = Second.rend();
9272             for (; CI != CE && SI != SE; ++CI, ++SI) {
9273               if (CI->getAssociatedExpression()->getStmtClass() !=
9274                   SI->getAssociatedExpression()->getStmtClass())
9275                 break;
9276               // Are we dealing with different variables/fields?
9277               if (CI->getAssociatedDeclaration() !=
9278                   SI->getAssociatedDeclaration())
9279                 break;
9280             }
9281 
9282             // Lists contain the same elements.
9283             if (CI == CE && SI == SE)
9284               return false;
9285 
9286             // List with less elements is less than list with more elements.
9287             if (CI == CE || SI == SE)
9288               return CI == CE;
9289 
9290             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9291             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9292             if (FD1->getParent() == FD2->getParent())
9293               return FD1->getFieldIndex() < FD2->getFieldIndex();
9294             const auto *It =
9295                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9296                   return FD == FD1 || FD == FD2;
9297                 });
9298             return *It == FD1;
9299           });
9300     }
9301 
9302     // Associated with a capture, because the mapping flags depend on it.
9303     // Go through all of the elements with the overlapped elements.
9304     bool IsFirstComponentList = true;
9305     for (const auto &Pair : OverlappedData) {
9306       const MapData &L = *Pair.getFirst();
9307       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9308       OpenMPMapClauseKind MapType;
9309       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9310       bool IsImplicit;
9311       const ValueDecl *Mapper;
9312       const Expr *VarRef;
9313       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9314           L;
9315       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9316           OverlappedComponents = Pair.getSecond();
9317       generateInfoForComponentList(
9318           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9319           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9320           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9321       IsFirstComponentList = false;
9322     }
9323     // Go through other elements without overlapped elements.
9324     for (const MapData &L : DeclComponentLists) {
9325       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9326       OpenMPMapClauseKind MapType;
9327       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9328       bool IsImplicit;
9329       const ValueDecl *Mapper;
9330       const Expr *VarRef;
9331       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9332           L;
9333       auto It = OverlappedData.find(&L);
9334       if (It == OverlappedData.end())
9335         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9336                                      Components, CombinedInfo, PartialStruct,
9337                                      IsFirstComponentList, IsImplicit, Mapper,
9338                                      /*ForDeviceAddr=*/false, VD, VarRef);
9339       IsFirstComponentList = false;
9340     }
9341   }
9342 
9343   /// Generate the default map information for a given capture \a CI,
9344   /// record field declaration \a RI and captured value \a CV.
9345   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9346                               const FieldDecl &RI, llvm::Value *CV,
9347                               MapCombinedInfoTy &CombinedInfo) const {
9348     bool IsImplicit = true;
9349     // Do the default mapping.
9350     if (CI.capturesThis()) {
9351       CombinedInfo.Exprs.push_back(nullptr);
9352       CombinedInfo.BasePointers.push_back(CV);
9353       CombinedInfo.Pointers.push_back(CV);
9354       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9355       CombinedInfo.Sizes.push_back(
9356           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9357                                     CGF.Int64Ty, /*isSigned=*/true));
9358       // Default map type.
9359       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9360     } else if (CI.capturesVariableByCopy()) {
9361       const VarDecl *VD = CI.getCapturedVar();
9362       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9363       CombinedInfo.BasePointers.push_back(CV);
9364       CombinedInfo.Pointers.push_back(CV);
9365       if (!RI.getType()->isAnyPointerType()) {
9366         // We have to signal to the runtime captures passed by value that are
9367         // not pointers.
9368         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9369         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9370             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9371       } else {
9372         // Pointers are implicitly mapped with a zero size and no flags
9373         // (other than first map that is added for all implicit maps).
9374         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9375         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9376       }
9377       auto I = FirstPrivateDecls.find(VD);
9378       if (I != FirstPrivateDecls.end())
9379         IsImplicit = I->getSecond();
9380     } else {
9381       assert(CI.capturesVariable() && "Expected captured reference.");
9382       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9383       QualType ElementType = PtrTy->getPointeeType();
9384       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9385           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9386       // The default map type for a scalar/complex type is 'to' because by
9387       // default the value doesn't have to be retrieved. For an aggregate
9388       // type, the default is 'tofrom'.
9389       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9390       const VarDecl *VD = CI.getCapturedVar();
9391       auto I = FirstPrivateDecls.find(VD);
9392       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9393       CombinedInfo.BasePointers.push_back(CV);
9394       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9395         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9396             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9397             AlignmentSource::Decl));
9398         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9399       } else {
9400         CombinedInfo.Pointers.push_back(CV);
9401       }
9402       if (I != FirstPrivateDecls.end())
9403         IsImplicit = I->getSecond();
9404     }
9405     // Every default map produces a single argument which is a target parameter.
9406     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9407 
9408     // Add flag stating this is an implicit map.
9409     if (IsImplicit)
9410       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9411 
9412     // No user-defined mapper for default mapping.
9413     CombinedInfo.Mappers.push_back(nullptr);
9414   }
9415 };
9416 } // anonymous namespace
9417 
9418 static void emitNonContiguousDescriptor(
9419     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9420     CGOpenMPRuntime::TargetDataInfo &Info) {
9421   CodeGenModule &CGM = CGF.CGM;
9422   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9423       &NonContigInfo = CombinedInfo.NonContigInfo;
9424 
9425   // Build an array of struct descriptor_dim and then assign it to
9426   // offload_args.
9427   //
9428   // struct descriptor_dim {
9429   //  uint64_t offset;
9430   //  uint64_t count;
9431   //  uint64_t stride
9432   // };
9433   ASTContext &C = CGF.getContext();
9434   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9435   RecordDecl *RD;
9436   RD = C.buildImplicitRecord("descriptor_dim");
9437   RD->startDefinition();
9438   addFieldToRecordDecl(C, RD, Int64Ty);
9439   addFieldToRecordDecl(C, RD, Int64Ty);
9440   addFieldToRecordDecl(C, RD, Int64Ty);
9441   RD->completeDefinition();
9442   QualType DimTy = C.getRecordType(RD);
9443 
9444   enum { OffsetFD = 0, CountFD, StrideFD };
9445   // We need two index variable here since the size of "Dims" is the same as the
9446   // size of Components, however, the size of offset, count, and stride is equal
9447   // to the size of base declaration that is non-contiguous.
9448   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9449     // Skip emitting ir if dimension size is 1 since it cannot be
9450     // non-contiguous.
9451     if (NonContigInfo.Dims[I] == 1)
9452       continue;
9453     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9454     QualType ArrayTy =
9455         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9456     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9457     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9458       unsigned RevIdx = EE - II - 1;
9459       LValue DimsLVal = CGF.MakeAddrLValue(
9460           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9461       // Offset
9462       LValue OffsetLVal = CGF.EmitLValueForField(
9463           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9464       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9465       // Count
9466       LValue CountLVal = CGF.EmitLValueForField(
9467           DimsLVal, *std::next(RD->field_begin(), CountFD));
9468       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9469       // Stride
9470       LValue StrideLVal = CGF.EmitLValueForField(
9471           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9472       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9473     }
9474     // args[I] = &dims
9475     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9476         DimsAddr, CGM.Int8PtrTy);
9477     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9478         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9479         Info.PointersArray, 0, I);
9480     Address PAddr(P, CGF.getPointerAlign());
9481     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9482     ++L;
9483   }
9484 }
9485 
9486 // Try to extract the base declaration from a `this->x` expression if possible.
9487 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9488   if (!E)
9489     return nullptr;
9490 
9491   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9492     if (const MemberExpr *ME =
9493             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9494       return ME->getMemberDecl();
9495   return nullptr;
9496 }
9497 
9498 /// Emit a string constant containing the names of the values mapped to the
9499 /// offloading runtime library.
9500 llvm::Constant *
9501 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9502                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9503 
9504   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9505     return OMPBuilder.getOrCreateDefaultSrcLocStr();
9506 
9507   SourceLocation Loc;
9508   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9509     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9510       Loc = VD->getLocation();
9511     else
9512       Loc = MapExprs.getMapExpr()->getExprLoc();
9513   } else {
9514     Loc = MapExprs.getMapDecl()->getLocation();
9515   }
9516 
9517   std::string ExprName = "";
9518   if (MapExprs.getMapExpr()) {
9519     PrintingPolicy P(CGF.getContext().getLangOpts());
9520     llvm::raw_string_ostream OS(ExprName);
9521     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9522     OS.flush();
9523   } else {
9524     ExprName = MapExprs.getMapDecl()->getNameAsString();
9525   }
9526 
9527   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9528   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(),
9529                                          PLoc.getLine(), PLoc.getColumn());
9530 }
9531 
9532 /// Emit the arrays used to pass the captures and map information to the
9533 /// offloading runtime library. If there is no map or capture information,
9534 /// return nullptr by reference.
9535 static void emitOffloadingArrays(
9536     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9537     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9538     bool IsNonContiguous = false) {
9539   CodeGenModule &CGM = CGF.CGM;
9540   ASTContext &Ctx = CGF.getContext();
9541 
9542   // Reset the array information.
9543   Info.clearArrayInfo();
9544   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9545 
9546   if (Info.NumberOfPtrs) {
9547     // Detect if we have any capture size requiring runtime evaluation of the
9548     // size so that a constant array could be eventually used.
9549     bool hasRuntimeEvaluationCaptureSize = false;
9550     for (llvm::Value *S : CombinedInfo.Sizes)
9551       if (!isa<llvm::Constant>(S)) {
9552         hasRuntimeEvaluationCaptureSize = true;
9553         break;
9554       }
9555 
9556     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9557     QualType PointerArrayType = Ctx.getConstantArrayType(
9558         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9559         /*IndexTypeQuals=*/0);
9560 
9561     Info.BasePointersArray =
9562         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9563     Info.PointersArray =
9564         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9565     Address MappersArray =
9566         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9567     Info.MappersArray = MappersArray.getPointer();
9568 
9569     // If we don't have any VLA types or other types that require runtime
9570     // evaluation, we can use a constant array for the map sizes, otherwise we
9571     // need to fill up the arrays as we do for the pointers.
9572     QualType Int64Ty =
9573         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9574     if (hasRuntimeEvaluationCaptureSize) {
9575       QualType SizeArrayType = Ctx.getConstantArrayType(
9576           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9577           /*IndexTypeQuals=*/0);
9578       Info.SizesArray =
9579           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9580     } else {
9581       // We expect all the sizes to be constant, so we collect them to create
9582       // a constant array.
9583       SmallVector<llvm::Constant *, 16> ConstSizes;
9584       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9585         if (IsNonContiguous &&
9586             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9587           ConstSizes.push_back(llvm::ConstantInt::get(
9588               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9589         } else {
9590           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9591         }
9592       }
9593 
9594       auto *SizesArrayInit = llvm::ConstantArray::get(
9595           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9596       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9597       auto *SizesArrayGbl = new llvm::GlobalVariable(
9598           CGM.getModule(), SizesArrayInit->getType(),
9599           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9600           SizesArrayInit, Name);
9601       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9602       Info.SizesArray = SizesArrayGbl;
9603     }
9604 
9605     // The map types are always constant so we don't need to generate code to
9606     // fill arrays. Instead, we create an array constant.
9607     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9608     llvm::copy(CombinedInfo.Types, Mapping.begin());
9609     std::string MaptypesName =
9610         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9611     auto *MapTypesArrayGbl =
9612         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9613     Info.MapTypesArray = MapTypesArrayGbl;
9614 
9615     // The information types are only built if there is debug information
9616     // requested.
9617     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9618       Info.MapNamesArray = llvm::Constant::getNullValue(
9619           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9620     } else {
9621       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9622         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9623       };
9624       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9625       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9626       std::string MapnamesName =
9627           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9628       auto *MapNamesArrayGbl =
9629           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9630       Info.MapNamesArray = MapNamesArrayGbl;
9631     }
9632 
9633     // If there's a present map type modifier, it must not be applied to the end
9634     // of a region, so generate a separate map type array in that case.
9635     if (Info.separateBeginEndCalls()) {
9636       bool EndMapTypesDiffer = false;
9637       for (uint64_t &Type : Mapping) {
9638         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9639           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9640           EndMapTypesDiffer = true;
9641         }
9642       }
9643       if (EndMapTypesDiffer) {
9644         MapTypesArrayGbl =
9645             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9646         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9647       }
9648     }
9649 
9650     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9651       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9652       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9653           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9654           Info.BasePointersArray, 0, I);
9655       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9656           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9657       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9658       CGF.Builder.CreateStore(BPVal, BPAddr);
9659 
9660       if (Info.requiresDevicePointerInfo())
9661         if (const ValueDecl *DevVD =
9662                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9663           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9664 
9665       llvm::Value *PVal = CombinedInfo.Pointers[I];
9666       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9667           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9668           Info.PointersArray, 0, I);
9669       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9670           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9671       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9672       CGF.Builder.CreateStore(PVal, PAddr);
9673 
9674       if (hasRuntimeEvaluationCaptureSize) {
9675         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9676             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9677             Info.SizesArray,
9678             /*Idx0=*/0,
9679             /*Idx1=*/I);
9680         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9681         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9682                                                           CGM.Int64Ty,
9683                                                           /*isSigned=*/true),
9684                                 SAddr);
9685       }
9686 
9687       // Fill up the mapper array.
9688       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9689       if (CombinedInfo.Mappers[I]) {
9690         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9691             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9692         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9693         Info.HasMapper = true;
9694       }
9695       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9696       CGF.Builder.CreateStore(MFunc, MAddr);
9697     }
9698   }
9699 
9700   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9701       Info.NumberOfPtrs == 0)
9702     return;
9703 
9704   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9705 }
9706 
9707 namespace {
9708 /// Additional arguments for emitOffloadingArraysArgument function.
9709 struct ArgumentsOptions {
9710   bool ForEndCall = false;
9711   ArgumentsOptions() = default;
9712   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9713 };
9714 } // namespace
9715 
9716 /// Emit the arguments to be passed to the runtime library based on the
9717 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9718 /// ForEndCall, emit map types to be passed for the end of the region instead of
9719 /// the beginning.
9720 static void emitOffloadingArraysArgument(
9721     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9722     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9723     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9724     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9725     const ArgumentsOptions &Options = ArgumentsOptions()) {
9726   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9727          "expected region end call to runtime only when end call is separate");
9728   CodeGenModule &CGM = CGF.CGM;
9729   if (Info.NumberOfPtrs) {
9730     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9731         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9732         Info.BasePointersArray,
9733         /*Idx0=*/0, /*Idx1=*/0);
9734     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9735         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9736         Info.PointersArray,
9737         /*Idx0=*/0,
9738         /*Idx1=*/0);
9739     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9740         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9741         /*Idx0=*/0, /*Idx1=*/0);
9742     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9743         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9744         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9745                                                     : Info.MapTypesArray,
9746         /*Idx0=*/0,
9747         /*Idx1=*/0);
9748 
9749     // Only emit the mapper information arrays if debug information is
9750     // requested.
9751     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9752       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9753     else
9754       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9755           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9756           Info.MapNamesArray,
9757           /*Idx0=*/0,
9758           /*Idx1=*/0);
9759     // If there is no user-defined mapper, set the mapper array to nullptr to
9760     // avoid an unnecessary data privatization
9761     if (!Info.HasMapper)
9762       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9763     else
9764       MappersArrayArg =
9765           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9766   } else {
9767     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9768     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9769     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9770     MapTypesArrayArg =
9771         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9772     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9773     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9774   }
9775 }
9776 
9777 /// Check for inner distribute directive.
9778 static const OMPExecutableDirective *
9779 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9780   const auto *CS = D.getInnermostCapturedStmt();
9781   const auto *Body =
9782       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9783   const Stmt *ChildStmt =
9784       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9785 
9786   if (const auto *NestedDir =
9787           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9788     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9789     switch (D.getDirectiveKind()) {
9790     case OMPD_target:
9791       if (isOpenMPDistributeDirective(DKind))
9792         return NestedDir;
9793       if (DKind == OMPD_teams) {
9794         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9795             /*IgnoreCaptured=*/true);
9796         if (!Body)
9797           return nullptr;
9798         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9799         if (const auto *NND =
9800                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9801           DKind = NND->getDirectiveKind();
9802           if (isOpenMPDistributeDirective(DKind))
9803             return NND;
9804         }
9805       }
9806       return nullptr;
9807     case OMPD_target_teams:
9808       if (isOpenMPDistributeDirective(DKind))
9809         return NestedDir;
9810       return nullptr;
9811     case OMPD_target_parallel:
9812     case OMPD_target_simd:
9813     case OMPD_target_parallel_for:
9814     case OMPD_target_parallel_for_simd:
9815       return nullptr;
9816     case OMPD_target_teams_distribute:
9817     case OMPD_target_teams_distribute_simd:
9818     case OMPD_target_teams_distribute_parallel_for:
9819     case OMPD_target_teams_distribute_parallel_for_simd:
9820     case OMPD_parallel:
9821     case OMPD_for:
9822     case OMPD_parallel_for:
9823     case OMPD_parallel_master:
9824     case OMPD_parallel_sections:
9825     case OMPD_for_simd:
9826     case OMPD_parallel_for_simd:
9827     case OMPD_cancel:
9828     case OMPD_cancellation_point:
9829     case OMPD_ordered:
9830     case OMPD_threadprivate:
9831     case OMPD_allocate:
9832     case OMPD_task:
9833     case OMPD_simd:
9834     case OMPD_tile:
9835     case OMPD_unroll:
9836     case OMPD_sections:
9837     case OMPD_section:
9838     case OMPD_single:
9839     case OMPD_master:
9840     case OMPD_critical:
9841     case OMPD_taskyield:
9842     case OMPD_barrier:
9843     case OMPD_taskwait:
9844     case OMPD_taskgroup:
9845     case OMPD_atomic:
9846     case OMPD_flush:
9847     case OMPD_depobj:
9848     case OMPD_scan:
9849     case OMPD_teams:
9850     case OMPD_target_data:
9851     case OMPD_target_exit_data:
9852     case OMPD_target_enter_data:
9853     case OMPD_distribute:
9854     case OMPD_distribute_simd:
9855     case OMPD_distribute_parallel_for:
9856     case OMPD_distribute_parallel_for_simd:
9857     case OMPD_teams_distribute:
9858     case OMPD_teams_distribute_simd:
9859     case OMPD_teams_distribute_parallel_for:
9860     case OMPD_teams_distribute_parallel_for_simd:
9861     case OMPD_target_update:
9862     case OMPD_declare_simd:
9863     case OMPD_declare_variant:
9864     case OMPD_begin_declare_variant:
9865     case OMPD_end_declare_variant:
9866     case OMPD_declare_target:
9867     case OMPD_end_declare_target:
9868     case OMPD_declare_reduction:
9869     case OMPD_declare_mapper:
9870     case OMPD_taskloop:
9871     case OMPD_taskloop_simd:
9872     case OMPD_master_taskloop:
9873     case OMPD_master_taskloop_simd:
9874     case OMPD_parallel_master_taskloop:
9875     case OMPD_parallel_master_taskloop_simd:
9876     case OMPD_requires:
9877     case OMPD_metadirective:
9878     case OMPD_unknown:
9879     default:
9880       llvm_unreachable("Unexpected directive.");
9881     }
9882   }
9883 
9884   return nullptr;
9885 }
9886 
9887 /// Emit the user-defined mapper function. The code generation follows the
9888 /// pattern in the example below.
9889 /// \code
9890 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9891 ///                                           void *base, void *begin,
9892 ///                                           int64_t size, int64_t type,
9893 ///                                           void *name = nullptr) {
9894 ///   // Allocate space for an array section first or add a base/begin for
9895 ///   // pointer dereference.
9896 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9897 ///       !maptype.IsDelete)
9898 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9899 ///                                 size*sizeof(Ty), clearToFromMember(type));
9900 ///   // Map members.
9901 ///   for (unsigned i = 0; i < size; i++) {
9902 ///     // For each component specified by this mapper:
9903 ///     for (auto c : begin[i]->all_components) {
9904 ///       if (c.hasMapper())
9905 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9906 ///                       c.arg_type, c.arg_name);
9907 ///       else
9908 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9909 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9910 ///                                     c.arg_name);
9911 ///     }
9912 ///   }
9913 ///   // Delete the array section.
9914 ///   if (size > 1 && maptype.IsDelete)
9915 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9916 ///                                 size*sizeof(Ty), clearToFromMember(type));
9917 /// }
9918 /// \endcode
9919 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9920                                             CodeGenFunction *CGF) {
9921   if (UDMMap.count(D) > 0)
9922     return;
9923   ASTContext &C = CGM.getContext();
9924   QualType Ty = D->getType();
9925   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9926   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9927   auto *MapperVarDecl =
9928       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9929   SourceLocation Loc = D->getLocation();
9930   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9931 
9932   // Prepare mapper function arguments and attributes.
9933   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9934                               C.VoidPtrTy, ImplicitParamDecl::Other);
9935   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9936                             ImplicitParamDecl::Other);
9937   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9938                              C.VoidPtrTy, ImplicitParamDecl::Other);
9939   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9940                             ImplicitParamDecl::Other);
9941   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9942                             ImplicitParamDecl::Other);
9943   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9944                             ImplicitParamDecl::Other);
9945   FunctionArgList Args;
9946   Args.push_back(&HandleArg);
9947   Args.push_back(&BaseArg);
9948   Args.push_back(&BeginArg);
9949   Args.push_back(&SizeArg);
9950   Args.push_back(&TypeArg);
9951   Args.push_back(&NameArg);
9952   const CGFunctionInfo &FnInfo =
9953       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9954   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9955   SmallString<64> TyStr;
9956   llvm::raw_svector_ostream Out(TyStr);
9957   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9958   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9959   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9960                                     Name, &CGM.getModule());
9961   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9962   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9963   // Start the mapper function code generation.
9964   CodeGenFunction MapperCGF(CGM);
9965   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9966   // Compute the starting and end addresses of array elements.
9967   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9968       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9969       C.getPointerType(Int64Ty), Loc);
9970   // Prepare common arguments for array initiation and deletion.
9971   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9972       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9973       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9974   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9975       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9976       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9977   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9978       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9979       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9980   // Convert the size in bytes into the number of array elements.
9981   Size = MapperCGF.Builder.CreateExactUDiv(
9982       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9983   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9984       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9985   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
9986       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
9987   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9988       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9989       C.getPointerType(Int64Ty), Loc);
9990   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9991       MapperCGF.GetAddrOfLocalVar(&NameArg),
9992       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9993 
9994   // Emit array initiation if this is an array section and \p MapType indicates
9995   // that memory allocation is required.
9996   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9997   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9998                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9999 
10000   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10001 
10002   // Emit the loop header block.
10003   MapperCGF.EmitBlock(HeadBB);
10004   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10005   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10006   // Evaluate whether the initial condition is satisfied.
10007   llvm::Value *IsEmpty =
10008       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10009   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10010   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10011 
10012   // Emit the loop body block.
10013   MapperCGF.EmitBlock(BodyBB);
10014   llvm::BasicBlock *LastBB = BodyBB;
10015   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10016       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10017   PtrPHI->addIncoming(PtrBegin, EntryBB);
10018   Address PtrCurrent =
10019       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10020                           .getAlignment()
10021                           .alignmentOfArrayElement(ElementSize));
10022   // Privatize the declared variable of mapper to be the current array element.
10023   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10024   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10025   (void)Scope.Privatize();
10026 
10027   // Get map clause information. Fill up the arrays with all mapped variables.
10028   MappableExprsHandler::MapCombinedInfoTy Info;
10029   MappableExprsHandler MEHandler(*D, MapperCGF);
10030   MEHandler.generateAllInfoForMapper(Info);
10031 
10032   // Call the runtime API __tgt_mapper_num_components to get the number of
10033   // pre-existing components.
10034   llvm::Value *OffloadingArgs[] = {Handle};
10035   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10036       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10037                                             OMPRTL___tgt_mapper_num_components),
10038       OffloadingArgs);
10039   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10040       PreviousSize,
10041       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10042 
10043   // Fill up the runtime mapper handle for all components.
10044   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10045     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10046         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10047     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10048         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10049     llvm::Value *CurSizeArg = Info.Sizes[I];
10050     llvm::Value *CurNameArg =
10051         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10052             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10053             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10054 
10055     // Extract the MEMBER_OF field from the map type.
10056     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10057     llvm::Value *MemberMapType =
10058         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10059 
10060     // Combine the map type inherited from user-defined mapper with that
10061     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10062     // bits of the \a MapType, which is the input argument of the mapper
10063     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10064     // bits of MemberMapType.
10065     // [OpenMP 5.0], 1.2.6. map-type decay.
10066     //        | alloc |  to   | from  | tofrom | release | delete
10067     // ----------------------------------------------------------
10068     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10069     // to     | alloc |  to   | alloc |   to   | release | delete
10070     // from   | alloc | alloc | from  |  from  | release | delete
10071     // tofrom | alloc |  to   | from  | tofrom | release | delete
10072     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10073         MapType,
10074         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10075                                    MappableExprsHandler::OMP_MAP_FROM));
10076     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10077     llvm::BasicBlock *AllocElseBB =
10078         MapperCGF.createBasicBlock("omp.type.alloc.else");
10079     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10080     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10081     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10082     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10083     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10084     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10085     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10086     MapperCGF.EmitBlock(AllocBB);
10087     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10088         MemberMapType,
10089         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10090                                      MappableExprsHandler::OMP_MAP_FROM)));
10091     MapperCGF.Builder.CreateBr(EndBB);
10092     MapperCGF.EmitBlock(AllocElseBB);
10093     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10094         LeftToFrom,
10095         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10096     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10097     // In case of to, clear OMP_MAP_FROM.
10098     MapperCGF.EmitBlock(ToBB);
10099     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10100         MemberMapType,
10101         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10102     MapperCGF.Builder.CreateBr(EndBB);
10103     MapperCGF.EmitBlock(ToElseBB);
10104     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10105         LeftToFrom,
10106         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10107     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10108     // In case of from, clear OMP_MAP_TO.
10109     MapperCGF.EmitBlock(FromBB);
10110     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10111         MemberMapType,
10112         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10113     // In case of tofrom, do nothing.
10114     MapperCGF.EmitBlock(EndBB);
10115     LastBB = EndBB;
10116     llvm::PHINode *CurMapType =
10117         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10118     CurMapType->addIncoming(AllocMapType, AllocBB);
10119     CurMapType->addIncoming(ToMapType, ToBB);
10120     CurMapType->addIncoming(FromMapType, FromBB);
10121     CurMapType->addIncoming(MemberMapType, ToElseBB);
10122 
10123     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10124                                      CurSizeArg, CurMapType, CurNameArg};
10125     if (Info.Mappers[I]) {
10126       // Call the corresponding mapper function.
10127       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10128           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10129       assert(MapperFunc && "Expect a valid mapper function is available.");
10130       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10131     } else {
10132       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10133       // data structure.
10134       MapperCGF.EmitRuntimeCall(
10135           OMPBuilder.getOrCreateRuntimeFunction(
10136               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10137           OffloadingArgs);
10138     }
10139   }
10140 
10141   // Update the pointer to point to the next element that needs to be mapped,
10142   // and check whether we have mapped all elements.
10143   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10144   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10145       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10146   PtrPHI->addIncoming(PtrNext, LastBB);
10147   llvm::Value *IsDone =
10148       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10149   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10150   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10151 
10152   MapperCGF.EmitBlock(ExitBB);
10153   // Emit array deletion if this is an array section and \p MapType indicates
10154   // that deletion is required.
10155   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10156                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10157 
10158   // Emit the function exit block.
10159   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10160   MapperCGF.FinishFunction();
10161   UDMMap.try_emplace(D, Fn);
10162   if (CGF) {
10163     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10164     Decls.second.push_back(D);
10165   }
10166 }
10167 
10168 /// Emit the array initialization or deletion portion for user-defined mapper
10169 /// code generation. First, it evaluates whether an array section is mapped and
10170 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10171 /// true, and \a MapType indicates to not delete this array, array
10172 /// initialization code is generated. If \a IsInit is false, and \a MapType
10173 /// indicates to not this array, array deletion code is generated.
10174 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10175     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10176     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10177     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10178     bool IsInit) {
10179   StringRef Prefix = IsInit ? ".init" : ".del";
10180 
10181   // Evaluate if this is an array section.
10182   llvm::BasicBlock *BodyBB =
10183       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10184   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10185       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10186   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10187       MapType,
10188       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10189   llvm::Value *DeleteCond;
10190   llvm::Value *Cond;
10191   if (IsInit) {
10192     // base != begin?
10193     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10194         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10195     // IsPtrAndObj?
10196     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10197         MapType,
10198         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10199     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10200     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10201     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10202     DeleteCond = MapperCGF.Builder.CreateIsNull(
10203         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10204   } else {
10205     Cond = IsArray;
10206     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10207         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10208   }
10209   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10210   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10211 
10212   MapperCGF.EmitBlock(BodyBB);
10213   // Get the array size by multiplying element size and element number (i.e., \p
10214   // Size).
10215   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10216       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10217   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10218   // memory allocation/deletion purpose only.
10219   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10220       MapType,
10221       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10222                                    MappableExprsHandler::OMP_MAP_FROM)));
10223   MapTypeArg = MapperCGF.Builder.CreateOr(
10224       MapTypeArg,
10225       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10226 
10227   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10228   // data structure.
10229   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10230                                    ArraySize, MapTypeArg, MapName};
10231   MapperCGF.EmitRuntimeCall(
10232       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10233                                             OMPRTL___tgt_push_mapper_component),
10234       OffloadingArgs);
10235 }
10236 
10237 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10238     const OMPDeclareMapperDecl *D) {
10239   auto I = UDMMap.find(D);
10240   if (I != UDMMap.end())
10241     return I->second;
10242   emitUserDefinedMapper(D);
10243   return UDMMap.lookup(D);
10244 }
10245 
10246 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10247     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10248     llvm::Value *DeviceID,
10249     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10250                                      const OMPLoopDirective &D)>
10251         SizeEmitter) {
10252   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10253   const OMPExecutableDirective *TD = &D;
10254   // Get nested teams distribute kind directive, if any.
10255   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10256     TD = getNestedDistributeDirective(CGM.getContext(), D);
10257   if (!TD)
10258     return;
10259   const auto *LD = cast<OMPLoopDirective>(TD);
10260   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10261                                                          PrePostActionTy &) {
10262     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10263       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10264       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10265       CGF.EmitRuntimeCall(
10266           OMPBuilder.getOrCreateRuntimeFunction(
10267               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10268           Args);
10269     }
10270   };
10271   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10272 }
10273 
10274 void CGOpenMPRuntime::emitTargetCall(
10275     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10276     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10277     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10278     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10279                                      const OMPLoopDirective &D)>
10280         SizeEmitter) {
10281   if (!CGF.HaveInsertPoint())
10282     return;
10283 
10284   assert(OutlinedFn && "Invalid outlined function!");
10285 
10286   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10287                                  D.hasClausesOfKind<OMPNowaitClause>();
10288   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10289   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10290   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10291                                             PrePostActionTy &) {
10292     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10293   };
10294   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10295 
10296   CodeGenFunction::OMPTargetDataInfo InputInfo;
10297   llvm::Value *MapTypesArray = nullptr;
10298   llvm::Value *MapNamesArray = nullptr;
10299   // Fill up the pointer arrays and transfer execution to the device.
10300   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10301                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10302                     &CapturedVars,
10303                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10304     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10305       // Reverse offloading is not supported, so just execute on the host.
10306       if (RequiresOuterTask) {
10307         CapturedVars.clear();
10308         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10309       }
10310       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10311       return;
10312     }
10313 
10314     // On top of the arrays that were filled up, the target offloading call
10315     // takes as arguments the device id as well as the host pointer. The host
10316     // pointer is used by the runtime library to identify the current target
10317     // region, so it only has to be unique and not necessarily point to
10318     // anything. It could be the pointer to the outlined function that
10319     // implements the target region, but we aren't using that so that the
10320     // compiler doesn't need to keep that, and could therefore inline the host
10321     // function if proven worthwhile during optimization.
10322 
10323     // From this point on, we need to have an ID of the target region defined.
10324     assert(OutlinedFnID && "Invalid outlined function ID!");
10325 
10326     // Emit device ID if any.
10327     llvm::Value *DeviceID;
10328     if (Device.getPointer()) {
10329       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10330               Device.getInt() == OMPC_DEVICE_device_num) &&
10331              "Expected device_num modifier.");
10332       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10333       DeviceID =
10334           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10335     } else {
10336       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10337     }
10338 
10339     // Emit the number of elements in the offloading arrays.
10340     llvm::Value *PointerNum =
10341         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10342 
10343     // Return value of the runtime offloading call.
10344     llvm::Value *Return;
10345 
10346     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10347     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10348 
10349     // Source location for the ident struct
10350     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10351 
10352     // Emit tripcount for the target loop-based directive.
10353     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10354 
10355     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10356     // The target region is an outlined function launched by the runtime
10357     // via calls __tgt_target() or __tgt_target_teams().
10358     //
10359     // __tgt_target() launches a target region with one team and one thread,
10360     // executing a serial region.  This master thread may in turn launch
10361     // more threads within its team upon encountering a parallel region,
10362     // however, no additional teams can be launched on the device.
10363     //
10364     // __tgt_target_teams() launches a target region with one or more teams,
10365     // each with one or more threads.  This call is required for target
10366     // constructs such as:
10367     //  'target teams'
10368     //  'target' / 'teams'
10369     //  'target teams distribute parallel for'
10370     //  'target parallel'
10371     // and so on.
10372     //
10373     // Note that on the host and CPU targets, the runtime implementation of
10374     // these calls simply call the outlined function without forking threads.
10375     // The outlined functions themselves have runtime calls to
10376     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10377     // the compiler in emitTeamsCall() and emitParallelCall().
10378     //
10379     // In contrast, on the NVPTX target, the implementation of
10380     // __tgt_target_teams() launches a GPU kernel with the requested number
10381     // of teams and threads so no additional calls to the runtime are required.
10382     if (NumTeams) {
10383       // If we have NumTeams defined this means that we have an enclosed teams
10384       // region. Therefore we also expect to have NumThreads defined. These two
10385       // values should be defined in the presence of a teams directive,
10386       // regardless of having any clauses associated. If the user is using teams
10387       // but no clauses, these two values will be the default that should be
10388       // passed to the runtime library - a 32-bit integer with the value zero.
10389       assert(NumThreads && "Thread limit expression should be available along "
10390                            "with number of teams.");
10391       SmallVector<llvm::Value *> OffloadingArgs = {
10392           RTLoc,
10393           DeviceID,
10394           OutlinedFnID,
10395           PointerNum,
10396           InputInfo.BasePointersArray.getPointer(),
10397           InputInfo.PointersArray.getPointer(),
10398           InputInfo.SizesArray.getPointer(),
10399           MapTypesArray,
10400           MapNamesArray,
10401           InputInfo.MappersArray.getPointer(),
10402           NumTeams,
10403           NumThreads};
10404       if (HasNowait) {
10405         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10406         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10407         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10408         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10409         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10410         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10411       }
10412       Return = CGF.EmitRuntimeCall(
10413           OMPBuilder.getOrCreateRuntimeFunction(
10414               CGM.getModule(), HasNowait
10415                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10416                                    : OMPRTL___tgt_target_teams_mapper),
10417           OffloadingArgs);
10418     } else {
10419       SmallVector<llvm::Value *> OffloadingArgs = {
10420           RTLoc,
10421           DeviceID,
10422           OutlinedFnID,
10423           PointerNum,
10424           InputInfo.BasePointersArray.getPointer(),
10425           InputInfo.PointersArray.getPointer(),
10426           InputInfo.SizesArray.getPointer(),
10427           MapTypesArray,
10428           MapNamesArray,
10429           InputInfo.MappersArray.getPointer()};
10430       if (HasNowait) {
10431         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10432         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10433         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10434         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10435         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10436         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10437       }
10438       Return = CGF.EmitRuntimeCall(
10439           OMPBuilder.getOrCreateRuntimeFunction(
10440               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10441                                          : OMPRTL___tgt_target_mapper),
10442           OffloadingArgs);
10443     }
10444 
10445     // Check the error code and execute the host version if required.
10446     llvm::BasicBlock *OffloadFailedBlock =
10447         CGF.createBasicBlock("omp_offload.failed");
10448     llvm::BasicBlock *OffloadContBlock =
10449         CGF.createBasicBlock("omp_offload.cont");
10450     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10451     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10452 
10453     CGF.EmitBlock(OffloadFailedBlock);
10454     if (RequiresOuterTask) {
10455       CapturedVars.clear();
10456       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10457     }
10458     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10459     CGF.EmitBranch(OffloadContBlock);
10460 
10461     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10462   };
10463 
10464   // Notify that the host version must be executed.
10465   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10466                     RequiresOuterTask](CodeGenFunction &CGF,
10467                                        PrePostActionTy &) {
10468     if (RequiresOuterTask) {
10469       CapturedVars.clear();
10470       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10471     }
10472     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10473   };
10474 
10475   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10476                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10477                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10478     // Fill up the arrays with all the captured variables.
10479     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10480 
10481     // Get mappable expression information.
10482     MappableExprsHandler MEHandler(D, CGF);
10483     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10484     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10485 
10486     auto RI = CS.getCapturedRecordDecl()->field_begin();
10487     auto *CV = CapturedVars.begin();
10488     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10489                                               CE = CS.capture_end();
10490          CI != CE; ++CI, ++RI, ++CV) {
10491       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10492       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10493 
10494       // VLA sizes are passed to the outlined region by copy and do not have map
10495       // information associated.
10496       if (CI->capturesVariableArrayType()) {
10497         CurInfo.Exprs.push_back(nullptr);
10498         CurInfo.BasePointers.push_back(*CV);
10499         CurInfo.Pointers.push_back(*CV);
10500         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10501             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10502         // Copy to the device as an argument. No need to retrieve it.
10503         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10504                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10505                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10506         CurInfo.Mappers.push_back(nullptr);
10507       } else {
10508         // If we have any information in the map clause, we use it, otherwise we
10509         // just do a default mapping.
10510         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10511         if (!CI->capturesThis())
10512           MappedVarSet.insert(CI->getCapturedVar());
10513         else
10514           MappedVarSet.insert(nullptr);
10515         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10516           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10517         // Generate correct mapping for variables captured by reference in
10518         // lambdas.
10519         if (CI->capturesVariable())
10520           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10521                                                   CurInfo, LambdaPointers);
10522       }
10523       // We expect to have at least an element of information for this capture.
10524       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10525              "Non-existing map pointer for capture!");
10526       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10527              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10528              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10529              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10530              "Inconsistent map information sizes!");
10531 
10532       // If there is an entry in PartialStruct it means we have a struct with
10533       // individual members mapped. Emit an extra combined entry.
10534       if (PartialStruct.Base.isValid()) {
10535         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10536         MEHandler.emitCombinedEntry(
10537             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10538             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10539       }
10540 
10541       // We need to append the results of this capture to what we already have.
10542       CombinedInfo.append(CurInfo);
10543     }
10544     // Adjust MEMBER_OF flags for the lambdas captures.
10545     MEHandler.adjustMemberOfForLambdaCaptures(
10546         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10547         CombinedInfo.Types);
10548     // Map any list items in a map clause that were not captures because they
10549     // weren't referenced within the construct.
10550     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10551 
10552     TargetDataInfo Info;
10553     // Fill up the arrays and create the arguments.
10554     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10555     emitOffloadingArraysArgument(
10556         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10557         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10558         {/*ForEndTask=*/false});
10559 
10560     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10561     InputInfo.BasePointersArray =
10562         Address(Info.BasePointersArray, CGM.getPointerAlign());
10563     InputInfo.PointersArray =
10564         Address(Info.PointersArray, CGM.getPointerAlign());
10565     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10566     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10567     MapTypesArray = Info.MapTypesArray;
10568     MapNamesArray = Info.MapNamesArray;
10569     if (RequiresOuterTask)
10570       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10571     else
10572       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10573   };
10574 
10575   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10576                              CodeGenFunction &CGF, PrePostActionTy &) {
10577     if (RequiresOuterTask) {
10578       CodeGenFunction::OMPTargetDataInfo InputInfo;
10579       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10580     } else {
10581       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10582     }
10583   };
10584 
10585   // If we have a target function ID it means that we need to support
10586   // offloading, otherwise, just execute on the host. We need to execute on host
10587   // regardless of the conditional in the if clause if, e.g., the user do not
10588   // specify target triples.
10589   if (OutlinedFnID) {
10590     if (IfCond) {
10591       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10592     } else {
10593       RegionCodeGenTy ThenRCG(TargetThenGen);
10594       ThenRCG(CGF);
10595     }
10596   } else {
10597     RegionCodeGenTy ElseRCG(TargetElseGen);
10598     ElseRCG(CGF);
10599   }
10600 }
10601 
10602 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10603                                                     StringRef ParentName) {
10604   if (!S)
10605     return;
10606 
10607   // Codegen OMP target directives that offload compute to the device.
10608   bool RequiresDeviceCodegen =
10609       isa<OMPExecutableDirective>(S) &&
10610       isOpenMPTargetExecutionDirective(
10611           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10612 
10613   if (RequiresDeviceCodegen) {
10614     const auto &E = *cast<OMPExecutableDirective>(S);
10615     unsigned DeviceID;
10616     unsigned FileID;
10617     unsigned Line;
10618     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10619                              FileID, Line);
10620 
10621     // Is this a target region that should not be emitted as an entry point? If
10622     // so just signal we are done with this target region.
10623     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10624                                                             ParentName, Line))
10625       return;
10626 
10627     switch (E.getDirectiveKind()) {
10628     case OMPD_target:
10629       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10630                                                    cast<OMPTargetDirective>(E));
10631       break;
10632     case OMPD_target_parallel:
10633       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10634           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10635       break;
10636     case OMPD_target_teams:
10637       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10638           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10639       break;
10640     case OMPD_target_teams_distribute:
10641       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10642           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10643       break;
10644     case OMPD_target_teams_distribute_simd:
10645       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10646           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10647       break;
10648     case OMPD_target_parallel_for:
10649       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10650           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10651       break;
10652     case OMPD_target_parallel_for_simd:
10653       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10654           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10655       break;
10656     case OMPD_target_simd:
10657       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10658           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10659       break;
10660     case OMPD_target_teams_distribute_parallel_for:
10661       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10662           CGM, ParentName,
10663           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10664       break;
10665     case OMPD_target_teams_distribute_parallel_for_simd:
10666       CodeGenFunction::
10667           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10668               CGM, ParentName,
10669               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10670       break;
10671     case OMPD_parallel:
10672     case OMPD_for:
10673     case OMPD_parallel_for:
10674     case OMPD_parallel_master:
10675     case OMPD_parallel_sections:
10676     case OMPD_for_simd:
10677     case OMPD_parallel_for_simd:
10678     case OMPD_cancel:
10679     case OMPD_cancellation_point:
10680     case OMPD_ordered:
10681     case OMPD_threadprivate:
10682     case OMPD_allocate:
10683     case OMPD_task:
10684     case OMPD_simd:
10685     case OMPD_tile:
10686     case OMPD_unroll:
10687     case OMPD_sections:
10688     case OMPD_section:
10689     case OMPD_single:
10690     case OMPD_master:
10691     case OMPD_critical:
10692     case OMPD_taskyield:
10693     case OMPD_barrier:
10694     case OMPD_taskwait:
10695     case OMPD_taskgroup:
10696     case OMPD_atomic:
10697     case OMPD_flush:
10698     case OMPD_depobj:
10699     case OMPD_scan:
10700     case OMPD_teams:
10701     case OMPD_target_data:
10702     case OMPD_target_exit_data:
10703     case OMPD_target_enter_data:
10704     case OMPD_distribute:
10705     case OMPD_distribute_simd:
10706     case OMPD_distribute_parallel_for:
10707     case OMPD_distribute_parallel_for_simd:
10708     case OMPD_teams_distribute:
10709     case OMPD_teams_distribute_simd:
10710     case OMPD_teams_distribute_parallel_for:
10711     case OMPD_teams_distribute_parallel_for_simd:
10712     case OMPD_target_update:
10713     case OMPD_declare_simd:
10714     case OMPD_declare_variant:
10715     case OMPD_begin_declare_variant:
10716     case OMPD_end_declare_variant:
10717     case OMPD_declare_target:
10718     case OMPD_end_declare_target:
10719     case OMPD_declare_reduction:
10720     case OMPD_declare_mapper:
10721     case OMPD_taskloop:
10722     case OMPD_taskloop_simd:
10723     case OMPD_master_taskloop:
10724     case OMPD_master_taskloop_simd:
10725     case OMPD_parallel_master_taskloop:
10726     case OMPD_parallel_master_taskloop_simd:
10727     case OMPD_requires:
10728     case OMPD_metadirective:
10729     case OMPD_unknown:
10730     default:
10731       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10732     }
10733     return;
10734   }
10735 
10736   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10737     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10738       return;
10739 
10740     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10741     return;
10742   }
10743 
10744   // If this is a lambda function, look into its body.
10745   if (const auto *L = dyn_cast<LambdaExpr>(S))
10746     S = L->getBody();
10747 
10748   // Keep looking for target regions recursively.
10749   for (const Stmt *II : S->children())
10750     scanForTargetRegionsFunctions(II, ParentName);
10751 }
10752 
10753 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10754   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10755       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10756   if (!DevTy)
10757     return false;
10758   // Do not emit device_type(nohost) functions for the host.
10759   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10760     return true;
10761   // Do not emit device_type(host) functions for the device.
10762   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10763     return true;
10764   return false;
10765 }
10766 
10767 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10768   // If emitting code for the host, we do not process FD here. Instead we do
10769   // the normal code generation.
10770   if (!CGM.getLangOpts().OpenMPIsDevice) {
10771     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10772       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10773                                   CGM.getLangOpts().OpenMPIsDevice))
10774         return true;
10775     return false;
10776   }
10777 
10778   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10779   // Try to detect target regions in the function.
10780   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10781     StringRef Name = CGM.getMangledName(GD);
10782     scanForTargetRegionsFunctions(FD->getBody(), Name);
10783     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10784                                 CGM.getLangOpts().OpenMPIsDevice))
10785       return true;
10786   }
10787 
10788   // Do not to emit function if it is not marked as declare target.
10789   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10790          AlreadyEmittedTargetDecls.count(VD) == 0;
10791 }
10792 
10793 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10794   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10795                               CGM.getLangOpts().OpenMPIsDevice))
10796     return true;
10797 
10798   if (!CGM.getLangOpts().OpenMPIsDevice)
10799     return false;
10800 
10801   // Check if there are Ctors/Dtors in this declaration and look for target
10802   // regions in it. We use the complete variant to produce the kernel name
10803   // mangling.
10804   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10805   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10806     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10807       StringRef ParentName =
10808           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10809       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10810     }
10811     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10812       StringRef ParentName =
10813           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10814       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10815     }
10816   }
10817 
10818   // Do not to emit variable if it is not marked as declare target.
10819   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10820       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10821           cast<VarDecl>(GD.getDecl()));
10822   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10823       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10824        HasRequiresUnifiedSharedMemory)) {
10825     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10826     return true;
10827   }
10828   return false;
10829 }
10830 
10831 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10832                                                    llvm::Constant *Addr) {
10833   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10834       !CGM.getLangOpts().OpenMPIsDevice)
10835     return;
10836 
10837   // If we have host/nohost variables, they do not need to be registered.
10838   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10839       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10840   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10841     return;
10842 
10843   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10844       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10845   if (!Res) {
10846     if (CGM.getLangOpts().OpenMPIsDevice) {
10847       // Register non-target variables being emitted in device code (debug info
10848       // may cause this).
10849       StringRef VarName = CGM.getMangledName(VD);
10850       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10851     }
10852     return;
10853   }
10854   // Register declare target variables.
10855   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10856   StringRef VarName;
10857   CharUnits VarSize;
10858   llvm::GlobalValue::LinkageTypes Linkage;
10859 
10860   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10861       !HasRequiresUnifiedSharedMemory) {
10862     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10863     VarName = CGM.getMangledName(VD);
10864     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10865       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10866       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10867     } else {
10868       VarSize = CharUnits::Zero();
10869     }
10870     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10871     // Temp solution to prevent optimizations of the internal variables.
10872     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10873       // Do not create a "ref-variable" if the original is not also available
10874       // on the host.
10875       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10876         return;
10877       std::string RefName = getName({VarName, "ref"});
10878       if (!CGM.GetGlobalValue(RefName)) {
10879         llvm::Constant *AddrRef =
10880             getOrCreateInternalVariable(Addr->getType(), RefName);
10881         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10882         GVAddrRef->setConstant(/*Val=*/true);
10883         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10884         GVAddrRef->setInitializer(Addr);
10885         CGM.addCompilerUsedGlobal(GVAddrRef);
10886       }
10887     }
10888   } else {
10889     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10890             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10891              HasRequiresUnifiedSharedMemory)) &&
10892            "Declare target attribute must link or to with unified memory.");
10893     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10894       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10895     else
10896       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10897 
10898     if (CGM.getLangOpts().OpenMPIsDevice) {
10899       VarName = Addr->getName();
10900       Addr = nullptr;
10901     } else {
10902       VarName = getAddrOfDeclareTargetVar(VD).getName();
10903       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10904     }
10905     VarSize = CGM.getPointerSize();
10906     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10907   }
10908 
10909   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10910       VarName, Addr, VarSize, Flags, Linkage);
10911 }
10912 
10913 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10914   if (isa<FunctionDecl>(GD.getDecl()) ||
10915       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10916     return emitTargetFunctions(GD);
10917 
10918   return emitTargetGlobalVariable(GD);
10919 }
10920 
10921 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10922   for (const VarDecl *VD : DeferredGlobalVariables) {
10923     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10924         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10925     if (!Res)
10926       continue;
10927     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10928         !HasRequiresUnifiedSharedMemory) {
10929       CGM.EmitGlobal(VD);
10930     } else {
10931       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10932               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10933                HasRequiresUnifiedSharedMemory)) &&
10934              "Expected link clause or to clause with unified memory.");
10935       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10936     }
10937   }
10938 }
10939 
10940 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10941     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10942   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10943          " Expected target-based directive.");
10944 }
10945 
10946 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10947   for (const OMPClause *Clause : D->clauselists()) {
10948     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10949       HasRequiresUnifiedSharedMemory = true;
10950     } else if (const auto *AC =
10951                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10952       switch (AC->getAtomicDefaultMemOrderKind()) {
10953       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10954         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10955         break;
10956       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10957         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10958         break;
10959       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10960         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10961         break;
10962       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10963         break;
10964       }
10965     }
10966   }
10967 }
10968 
10969 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10970   return RequiresAtomicOrdering;
10971 }
10972 
10973 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10974                                                        LangAS &AS) {
10975   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10976     return false;
10977   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10978   switch(A->getAllocatorType()) {
10979   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10980   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10981   // Not supported, fallback to the default mem space.
10982   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10983   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10984   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10985   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10986   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10987   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10988   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10989     AS = LangAS::Default;
10990     return true;
10991   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10992     llvm_unreachable("Expected predefined allocator for the variables with the "
10993                      "static storage.");
10994   }
10995   return false;
10996 }
10997 
10998 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10999   return HasRequiresUnifiedSharedMemory;
11000 }
11001 
11002 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11003     CodeGenModule &CGM)
11004     : CGM(CGM) {
11005   if (CGM.getLangOpts().OpenMPIsDevice) {
11006     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11007     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11008   }
11009 }
11010 
11011 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11012   if (CGM.getLangOpts().OpenMPIsDevice)
11013     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11014 }
11015 
11016 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11017   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11018     return true;
11019 
11020   const auto *D = cast<FunctionDecl>(GD.getDecl());
11021   // Do not to emit function if it is marked as declare target as it was already
11022   // emitted.
11023   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11024     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11025       if (auto *F = dyn_cast_or_null<llvm::Function>(
11026               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11027         return !F->isDeclaration();
11028       return false;
11029     }
11030     return true;
11031   }
11032 
11033   return !AlreadyEmittedTargetDecls.insert(D).second;
11034 }
11035 
11036 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11037   // If we don't have entries or if we are emitting code for the device, we
11038   // don't need to do anything.
11039   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11040       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11041       (OffloadEntriesInfoManager.empty() &&
11042        !HasEmittedDeclareTargetRegion &&
11043        !HasEmittedTargetRegion))
11044     return nullptr;
11045 
11046   // Create and register the function that handles the requires directives.
11047   ASTContext &C = CGM.getContext();
11048 
11049   llvm::Function *RequiresRegFn;
11050   {
11051     CodeGenFunction CGF(CGM);
11052     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11053     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11054     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11055     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11056     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11057     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11058     // TODO: check for other requires clauses.
11059     // The requires directive takes effect only when a target region is
11060     // present in the compilation unit. Otherwise it is ignored and not
11061     // passed to the runtime. This avoids the runtime from throwing an error
11062     // for mismatching requires clauses across compilation units that don't
11063     // contain at least 1 target region.
11064     assert((HasEmittedTargetRegion ||
11065             HasEmittedDeclareTargetRegion ||
11066             !OffloadEntriesInfoManager.empty()) &&
11067            "Target or declare target region expected.");
11068     if (HasRequiresUnifiedSharedMemory)
11069       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11070     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11071                             CGM.getModule(), OMPRTL___tgt_register_requires),
11072                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11073     CGF.FinishFunction();
11074   }
11075   return RequiresRegFn;
11076 }
11077 
11078 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11079                                     const OMPExecutableDirective &D,
11080                                     SourceLocation Loc,
11081                                     llvm::Function *OutlinedFn,
11082                                     ArrayRef<llvm::Value *> CapturedVars) {
11083   if (!CGF.HaveInsertPoint())
11084     return;
11085 
11086   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11087   CodeGenFunction::RunCleanupsScope Scope(CGF);
11088 
11089   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11090   llvm::Value *Args[] = {
11091       RTLoc,
11092       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11093       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11094   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11095   RealArgs.append(std::begin(Args), std::end(Args));
11096   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11097 
11098   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11099       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11100   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11101 }
11102 
11103 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11104                                          const Expr *NumTeams,
11105                                          const Expr *ThreadLimit,
11106                                          SourceLocation Loc) {
11107   if (!CGF.HaveInsertPoint())
11108     return;
11109 
11110   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11111 
11112   llvm::Value *NumTeamsVal =
11113       NumTeams
11114           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11115                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11116           : CGF.Builder.getInt32(0);
11117 
11118   llvm::Value *ThreadLimitVal =
11119       ThreadLimit
11120           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11121                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11122           : CGF.Builder.getInt32(0);
11123 
11124   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11125   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11126                                      ThreadLimitVal};
11127   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11128                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11129                       PushNumTeamsArgs);
11130 }
11131 
11132 void CGOpenMPRuntime::emitTargetDataCalls(
11133     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11134     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11135   if (!CGF.HaveInsertPoint())
11136     return;
11137 
11138   // Action used to replace the default codegen action and turn privatization
11139   // off.
11140   PrePostActionTy NoPrivAction;
11141 
11142   // Generate the code for the opening of the data environment. Capture all the
11143   // arguments of the runtime call by reference because they are used in the
11144   // closing of the region.
11145   auto &&BeginThenGen = [this, &D, Device, &Info,
11146                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11147     // Fill up the arrays with all the mapped variables.
11148     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11149 
11150     // Get map clause information.
11151     MappableExprsHandler MEHandler(D, CGF);
11152     MEHandler.generateAllInfo(CombinedInfo);
11153 
11154     // Fill up the arrays and create the arguments.
11155     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11156                          /*IsNonContiguous=*/true);
11157 
11158     llvm::Value *BasePointersArrayArg = nullptr;
11159     llvm::Value *PointersArrayArg = nullptr;
11160     llvm::Value *SizesArrayArg = nullptr;
11161     llvm::Value *MapTypesArrayArg = nullptr;
11162     llvm::Value *MapNamesArrayArg = nullptr;
11163     llvm::Value *MappersArrayArg = nullptr;
11164     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11165                                  SizesArrayArg, MapTypesArrayArg,
11166                                  MapNamesArrayArg, MappersArrayArg, Info);
11167 
11168     // Emit device ID if any.
11169     llvm::Value *DeviceID = nullptr;
11170     if (Device) {
11171       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11172                                            CGF.Int64Ty, /*isSigned=*/true);
11173     } else {
11174       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11175     }
11176 
11177     // Emit the number of elements in the offloading arrays.
11178     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11179     //
11180     // Source location for the ident struct
11181     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11182 
11183     llvm::Value *OffloadingArgs[] = {RTLoc,
11184                                      DeviceID,
11185                                      PointerNum,
11186                                      BasePointersArrayArg,
11187                                      PointersArrayArg,
11188                                      SizesArrayArg,
11189                                      MapTypesArrayArg,
11190                                      MapNamesArrayArg,
11191                                      MappersArrayArg};
11192     CGF.EmitRuntimeCall(
11193         OMPBuilder.getOrCreateRuntimeFunction(
11194             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11195         OffloadingArgs);
11196 
11197     // If device pointer privatization is required, emit the body of the region
11198     // here. It will have to be duplicated: with and without privatization.
11199     if (!Info.CaptureDeviceAddrMap.empty())
11200       CodeGen(CGF);
11201   };
11202 
11203   // Generate code for the closing of the data region.
11204   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11205                                                 PrePostActionTy &) {
11206     assert(Info.isValid() && "Invalid data environment closing arguments.");
11207 
11208     llvm::Value *BasePointersArrayArg = nullptr;
11209     llvm::Value *PointersArrayArg = nullptr;
11210     llvm::Value *SizesArrayArg = nullptr;
11211     llvm::Value *MapTypesArrayArg = nullptr;
11212     llvm::Value *MapNamesArrayArg = nullptr;
11213     llvm::Value *MappersArrayArg = nullptr;
11214     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11215                                  SizesArrayArg, MapTypesArrayArg,
11216                                  MapNamesArrayArg, MappersArrayArg, Info,
11217                                  {/*ForEndCall=*/true});
11218 
11219     // Emit device ID if any.
11220     llvm::Value *DeviceID = nullptr;
11221     if (Device) {
11222       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11223                                            CGF.Int64Ty, /*isSigned=*/true);
11224     } else {
11225       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11226     }
11227 
11228     // Emit the number of elements in the offloading arrays.
11229     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11230 
11231     // Source location for the ident struct
11232     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11233 
11234     llvm::Value *OffloadingArgs[] = {RTLoc,
11235                                      DeviceID,
11236                                      PointerNum,
11237                                      BasePointersArrayArg,
11238                                      PointersArrayArg,
11239                                      SizesArrayArg,
11240                                      MapTypesArrayArg,
11241                                      MapNamesArrayArg,
11242                                      MappersArrayArg};
11243     CGF.EmitRuntimeCall(
11244         OMPBuilder.getOrCreateRuntimeFunction(
11245             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11246         OffloadingArgs);
11247   };
11248 
11249   // If we need device pointer privatization, we need to emit the body of the
11250   // region with no privatization in the 'else' branch of the conditional.
11251   // Otherwise, we don't have to do anything.
11252   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11253                                                          PrePostActionTy &) {
11254     if (!Info.CaptureDeviceAddrMap.empty()) {
11255       CodeGen.setAction(NoPrivAction);
11256       CodeGen(CGF);
11257     }
11258   };
11259 
11260   // We don't have to do anything to close the region if the if clause evaluates
11261   // to false.
11262   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11263 
11264   if (IfCond) {
11265     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11266   } else {
11267     RegionCodeGenTy RCG(BeginThenGen);
11268     RCG(CGF);
11269   }
11270 
11271   // If we don't require privatization of device pointers, we emit the body in
11272   // between the runtime calls. This avoids duplicating the body code.
11273   if (Info.CaptureDeviceAddrMap.empty()) {
11274     CodeGen.setAction(NoPrivAction);
11275     CodeGen(CGF);
11276   }
11277 
11278   if (IfCond) {
11279     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11280   } else {
11281     RegionCodeGenTy RCG(EndThenGen);
11282     RCG(CGF);
11283   }
11284 }
11285 
11286 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11287     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11288     const Expr *Device) {
11289   if (!CGF.HaveInsertPoint())
11290     return;
11291 
11292   assert((isa<OMPTargetEnterDataDirective>(D) ||
11293           isa<OMPTargetExitDataDirective>(D) ||
11294           isa<OMPTargetUpdateDirective>(D)) &&
11295          "Expecting either target enter, exit data, or update directives.");
11296 
11297   CodeGenFunction::OMPTargetDataInfo InputInfo;
11298   llvm::Value *MapTypesArray = nullptr;
11299   llvm::Value *MapNamesArray = nullptr;
11300   // Generate the code for the opening of the data environment.
11301   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11302                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11303     // Emit device ID if any.
11304     llvm::Value *DeviceID = nullptr;
11305     if (Device) {
11306       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11307                                            CGF.Int64Ty, /*isSigned=*/true);
11308     } else {
11309       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11310     }
11311 
11312     // Emit the number of elements in the offloading arrays.
11313     llvm::Constant *PointerNum =
11314         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11315 
11316     // Source location for the ident struct
11317     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11318 
11319     llvm::Value *OffloadingArgs[] = {RTLoc,
11320                                      DeviceID,
11321                                      PointerNum,
11322                                      InputInfo.BasePointersArray.getPointer(),
11323                                      InputInfo.PointersArray.getPointer(),
11324                                      InputInfo.SizesArray.getPointer(),
11325                                      MapTypesArray,
11326                                      MapNamesArray,
11327                                      InputInfo.MappersArray.getPointer()};
11328 
11329     // Select the right runtime function call for each standalone
11330     // directive.
11331     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11332     RuntimeFunction RTLFn;
11333     switch (D.getDirectiveKind()) {
11334     case OMPD_target_enter_data:
11335       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11336                         : OMPRTL___tgt_target_data_begin_mapper;
11337       break;
11338     case OMPD_target_exit_data:
11339       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11340                         : OMPRTL___tgt_target_data_end_mapper;
11341       break;
11342     case OMPD_target_update:
11343       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11344                         : OMPRTL___tgt_target_data_update_mapper;
11345       break;
11346     case OMPD_parallel:
11347     case OMPD_for:
11348     case OMPD_parallel_for:
11349     case OMPD_parallel_master:
11350     case OMPD_parallel_sections:
11351     case OMPD_for_simd:
11352     case OMPD_parallel_for_simd:
11353     case OMPD_cancel:
11354     case OMPD_cancellation_point:
11355     case OMPD_ordered:
11356     case OMPD_threadprivate:
11357     case OMPD_allocate:
11358     case OMPD_task:
11359     case OMPD_simd:
11360     case OMPD_tile:
11361     case OMPD_unroll:
11362     case OMPD_sections:
11363     case OMPD_section:
11364     case OMPD_single:
11365     case OMPD_master:
11366     case OMPD_critical:
11367     case OMPD_taskyield:
11368     case OMPD_barrier:
11369     case OMPD_taskwait:
11370     case OMPD_taskgroup:
11371     case OMPD_atomic:
11372     case OMPD_flush:
11373     case OMPD_depobj:
11374     case OMPD_scan:
11375     case OMPD_teams:
11376     case OMPD_target_data:
11377     case OMPD_distribute:
11378     case OMPD_distribute_simd:
11379     case OMPD_distribute_parallel_for:
11380     case OMPD_distribute_parallel_for_simd:
11381     case OMPD_teams_distribute:
11382     case OMPD_teams_distribute_simd:
11383     case OMPD_teams_distribute_parallel_for:
11384     case OMPD_teams_distribute_parallel_for_simd:
11385     case OMPD_declare_simd:
11386     case OMPD_declare_variant:
11387     case OMPD_begin_declare_variant:
11388     case OMPD_end_declare_variant:
11389     case OMPD_declare_target:
11390     case OMPD_end_declare_target:
11391     case OMPD_declare_reduction:
11392     case OMPD_declare_mapper:
11393     case OMPD_taskloop:
11394     case OMPD_taskloop_simd:
11395     case OMPD_master_taskloop:
11396     case OMPD_master_taskloop_simd:
11397     case OMPD_parallel_master_taskloop:
11398     case OMPD_parallel_master_taskloop_simd:
11399     case OMPD_target:
11400     case OMPD_target_simd:
11401     case OMPD_target_teams_distribute:
11402     case OMPD_target_teams_distribute_simd:
11403     case OMPD_target_teams_distribute_parallel_for:
11404     case OMPD_target_teams_distribute_parallel_for_simd:
11405     case OMPD_target_teams:
11406     case OMPD_target_parallel:
11407     case OMPD_target_parallel_for:
11408     case OMPD_target_parallel_for_simd:
11409     case OMPD_requires:
11410     case OMPD_metadirective:
11411     case OMPD_unknown:
11412     default:
11413       llvm_unreachable("Unexpected standalone target data directive.");
11414       break;
11415     }
11416     CGF.EmitRuntimeCall(
11417         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11418         OffloadingArgs);
11419   };
11420 
11421   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11422                           &MapNamesArray](CodeGenFunction &CGF,
11423                                           PrePostActionTy &) {
11424     // Fill up the arrays with all the mapped variables.
11425     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11426 
11427     // Get map clause information.
11428     MappableExprsHandler MEHandler(D, CGF);
11429     MEHandler.generateAllInfo(CombinedInfo);
11430 
11431     TargetDataInfo Info;
11432     // Fill up the arrays and create the arguments.
11433     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11434                          /*IsNonContiguous=*/true);
11435     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11436                              D.hasClausesOfKind<OMPNowaitClause>();
11437     emitOffloadingArraysArgument(
11438         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11439         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11440         {/*ForEndTask=*/false});
11441     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11442     InputInfo.BasePointersArray =
11443         Address(Info.BasePointersArray, CGM.getPointerAlign());
11444     InputInfo.PointersArray =
11445         Address(Info.PointersArray, CGM.getPointerAlign());
11446     InputInfo.SizesArray =
11447         Address(Info.SizesArray, CGM.getPointerAlign());
11448     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11449     MapTypesArray = Info.MapTypesArray;
11450     MapNamesArray = Info.MapNamesArray;
11451     if (RequiresOuterTask)
11452       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11453     else
11454       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11455   };
11456 
11457   if (IfCond) {
11458     emitIfClause(CGF, IfCond, TargetThenGen,
11459                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11460   } else {
11461     RegionCodeGenTy ThenRCG(TargetThenGen);
11462     ThenRCG(CGF);
11463   }
11464 }
11465 
11466 namespace {
11467   /// Kind of parameter in a function with 'declare simd' directive.
11468   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11469   /// Attribute set of the parameter.
11470   struct ParamAttrTy {
11471     ParamKindTy Kind = Vector;
11472     llvm::APSInt StrideOrArg;
11473     llvm::APSInt Alignment;
11474   };
11475 } // namespace
11476 
11477 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11478                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11479   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11480   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11481   // of that clause. The VLEN value must be power of 2.
11482   // In other case the notion of the function`s "characteristic data type" (CDT)
11483   // is used to compute the vector length.
11484   // CDT is defined in the following order:
11485   //   a) For non-void function, the CDT is the return type.
11486   //   b) If the function has any non-uniform, non-linear parameters, then the
11487   //   CDT is the type of the first such parameter.
11488   //   c) If the CDT determined by a) or b) above is struct, union, or class
11489   //   type which is pass-by-value (except for the type that maps to the
11490   //   built-in complex data type), the characteristic data type is int.
11491   //   d) If none of the above three cases is applicable, the CDT is int.
11492   // The VLEN is then determined based on the CDT and the size of vector
11493   // register of that ISA for which current vector version is generated. The
11494   // VLEN is computed using the formula below:
11495   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11496   // where vector register size specified in section 3.2.1 Registers and the
11497   // Stack Frame of original AMD64 ABI document.
11498   QualType RetType = FD->getReturnType();
11499   if (RetType.isNull())
11500     return 0;
11501   ASTContext &C = FD->getASTContext();
11502   QualType CDT;
11503   if (!RetType.isNull() && !RetType->isVoidType()) {
11504     CDT = RetType;
11505   } else {
11506     unsigned Offset = 0;
11507     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11508       if (ParamAttrs[Offset].Kind == Vector)
11509         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11510       ++Offset;
11511     }
11512     if (CDT.isNull()) {
11513       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11514         if (ParamAttrs[I + Offset].Kind == Vector) {
11515           CDT = FD->getParamDecl(I)->getType();
11516           break;
11517         }
11518       }
11519     }
11520   }
11521   if (CDT.isNull())
11522     CDT = C.IntTy;
11523   CDT = CDT->getCanonicalTypeUnqualified();
11524   if (CDT->isRecordType() || CDT->isUnionType())
11525     CDT = C.IntTy;
11526   return C.getTypeSize(CDT);
11527 }
11528 
11529 static void
11530 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11531                            const llvm::APSInt &VLENVal,
11532                            ArrayRef<ParamAttrTy> ParamAttrs,
11533                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11534   struct ISADataTy {
11535     char ISA;
11536     unsigned VecRegSize;
11537   };
11538   ISADataTy ISAData[] = {
11539       {
11540           'b', 128
11541       }, // SSE
11542       {
11543           'c', 256
11544       }, // AVX
11545       {
11546           'd', 256
11547       }, // AVX2
11548       {
11549           'e', 512
11550       }, // AVX512
11551   };
11552   llvm::SmallVector<char, 2> Masked;
11553   switch (State) {
11554   case OMPDeclareSimdDeclAttr::BS_Undefined:
11555     Masked.push_back('N');
11556     Masked.push_back('M');
11557     break;
11558   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11559     Masked.push_back('N');
11560     break;
11561   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11562     Masked.push_back('M');
11563     break;
11564   }
11565   for (char Mask : Masked) {
11566     for (const ISADataTy &Data : ISAData) {
11567       SmallString<256> Buffer;
11568       llvm::raw_svector_ostream Out(Buffer);
11569       Out << "_ZGV" << Data.ISA << Mask;
11570       if (!VLENVal) {
11571         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11572         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11573         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11574       } else {
11575         Out << VLENVal;
11576       }
11577       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11578         switch (ParamAttr.Kind){
11579         case LinearWithVarStride:
11580           Out << 's' << ParamAttr.StrideOrArg;
11581           break;
11582         case Linear:
11583           Out << 'l';
11584           if (ParamAttr.StrideOrArg != 1)
11585             Out << ParamAttr.StrideOrArg;
11586           break;
11587         case Uniform:
11588           Out << 'u';
11589           break;
11590         case Vector:
11591           Out << 'v';
11592           break;
11593         }
11594         if (!!ParamAttr.Alignment)
11595           Out << 'a' << ParamAttr.Alignment;
11596       }
11597       Out << '_' << Fn->getName();
11598       Fn->addFnAttr(Out.str());
11599     }
11600   }
11601 }
11602 
11603 // This are the Functions that are needed to mangle the name of the
11604 // vector functions generated by the compiler, according to the rules
11605 // defined in the "Vector Function ABI specifications for AArch64",
11606 // available at
11607 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11608 
11609 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11610 ///
11611 /// TODO: Need to implement the behavior for reference marked with a
11612 /// var or no linear modifiers (1.b in the section). For this, we
11613 /// need to extend ParamKindTy to support the linear modifiers.
11614 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11615   QT = QT.getCanonicalType();
11616 
11617   if (QT->isVoidType())
11618     return false;
11619 
11620   if (Kind == ParamKindTy::Uniform)
11621     return false;
11622 
11623   if (Kind == ParamKindTy::Linear)
11624     return false;
11625 
11626   // TODO: Handle linear references with modifiers
11627 
11628   if (Kind == ParamKindTy::LinearWithVarStride)
11629     return false;
11630 
11631   return true;
11632 }
11633 
11634 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11635 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11636   QT = QT.getCanonicalType();
11637   unsigned Size = C.getTypeSize(QT);
11638 
11639   // Only scalars and complex within 16 bytes wide set PVB to true.
11640   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11641     return false;
11642 
11643   if (QT->isFloatingType())
11644     return true;
11645 
11646   if (QT->isIntegerType())
11647     return true;
11648 
11649   if (QT->isPointerType())
11650     return true;
11651 
11652   // TODO: Add support for complex types (section 3.1.2, item 2).
11653 
11654   return false;
11655 }
11656 
11657 /// Computes the lane size (LS) of a return type or of an input parameter,
11658 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11659 /// TODO: Add support for references, section 3.2.1, item 1.
11660 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11661   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11662     QualType PTy = QT.getCanonicalType()->getPointeeType();
11663     if (getAArch64PBV(PTy, C))
11664       return C.getTypeSize(PTy);
11665   }
11666   if (getAArch64PBV(QT, C))
11667     return C.getTypeSize(QT);
11668 
11669   return C.getTypeSize(C.getUIntPtrType());
11670 }
11671 
11672 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11673 // signature of the scalar function, as defined in 3.2.2 of the
11674 // AAVFABI.
11675 static std::tuple<unsigned, unsigned, bool>
11676 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11677   QualType RetType = FD->getReturnType().getCanonicalType();
11678 
11679   ASTContext &C = FD->getASTContext();
11680 
11681   bool OutputBecomesInput = false;
11682 
11683   llvm::SmallVector<unsigned, 8> Sizes;
11684   if (!RetType->isVoidType()) {
11685     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11686     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11687       OutputBecomesInput = true;
11688   }
11689   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11690     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11691     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11692   }
11693 
11694   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11695   // The LS of a function parameter / return value can only be a power
11696   // of 2, starting from 8 bits, up to 128.
11697   assert(std::all_of(Sizes.begin(), Sizes.end(),
11698                      [](unsigned Size) {
11699                        return Size == 8 || Size == 16 || Size == 32 ||
11700                               Size == 64 || Size == 128;
11701                      }) &&
11702          "Invalid size");
11703 
11704   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11705                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11706                          OutputBecomesInput);
11707 }
11708 
11709 /// Mangle the parameter part of the vector function name according to
11710 /// their OpenMP classification. The mangling function is defined in
11711 /// section 3.5 of the AAVFABI.
11712 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11713   SmallString<256> Buffer;
11714   llvm::raw_svector_ostream Out(Buffer);
11715   for (const auto &ParamAttr : ParamAttrs) {
11716     switch (ParamAttr.Kind) {
11717     case LinearWithVarStride:
11718       Out << "ls" << ParamAttr.StrideOrArg;
11719       break;
11720     case Linear:
11721       Out << 'l';
11722       // Don't print the step value if it is not present or if it is
11723       // equal to 1.
11724       if (ParamAttr.StrideOrArg != 1)
11725         Out << ParamAttr.StrideOrArg;
11726       break;
11727     case Uniform:
11728       Out << 'u';
11729       break;
11730     case Vector:
11731       Out << 'v';
11732       break;
11733     }
11734 
11735     if (!!ParamAttr.Alignment)
11736       Out << 'a' << ParamAttr.Alignment;
11737   }
11738 
11739   return std::string(Out.str());
11740 }
11741 
11742 // Function used to add the attribute. The parameter `VLEN` is
11743 // templated to allow the use of "x" when targeting scalable functions
11744 // for SVE.
11745 template <typename T>
11746 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11747                                  char ISA, StringRef ParSeq,
11748                                  StringRef MangledName, bool OutputBecomesInput,
11749                                  llvm::Function *Fn) {
11750   SmallString<256> Buffer;
11751   llvm::raw_svector_ostream Out(Buffer);
11752   Out << Prefix << ISA << LMask << VLEN;
11753   if (OutputBecomesInput)
11754     Out << "v";
11755   Out << ParSeq << "_" << MangledName;
11756   Fn->addFnAttr(Out.str());
11757 }
11758 
11759 // Helper function to generate the Advanced SIMD names depending on
11760 // the value of the NDS when simdlen is not present.
11761 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11762                                       StringRef Prefix, char ISA,
11763                                       StringRef ParSeq, StringRef MangledName,
11764                                       bool OutputBecomesInput,
11765                                       llvm::Function *Fn) {
11766   switch (NDS) {
11767   case 8:
11768     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11769                          OutputBecomesInput, Fn);
11770     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11771                          OutputBecomesInput, Fn);
11772     break;
11773   case 16:
11774     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11775                          OutputBecomesInput, Fn);
11776     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11777                          OutputBecomesInput, Fn);
11778     break;
11779   case 32:
11780     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11781                          OutputBecomesInput, Fn);
11782     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11783                          OutputBecomesInput, Fn);
11784     break;
11785   case 64:
11786   case 128:
11787     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11788                          OutputBecomesInput, Fn);
11789     break;
11790   default:
11791     llvm_unreachable("Scalar type is too wide.");
11792   }
11793 }
11794 
11795 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11796 static void emitAArch64DeclareSimdFunction(
11797     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11798     ArrayRef<ParamAttrTy> ParamAttrs,
11799     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11800     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11801 
11802   // Get basic data for building the vector signature.
11803   const auto Data = getNDSWDS(FD, ParamAttrs);
11804   const unsigned NDS = std::get<0>(Data);
11805   const unsigned WDS = std::get<1>(Data);
11806   const bool OutputBecomesInput = std::get<2>(Data);
11807 
11808   // Check the values provided via `simdlen` by the user.
11809   // 1. A `simdlen(1)` doesn't produce vector signatures,
11810   if (UserVLEN == 1) {
11811     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11812         DiagnosticsEngine::Warning,
11813         "The clause simdlen(1) has no effect when targeting aarch64.");
11814     CGM.getDiags().Report(SLoc, DiagID);
11815     return;
11816   }
11817 
11818   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11819   // Advanced SIMD output.
11820   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11821     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11822         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11823                                     "power of 2 when targeting Advanced SIMD.");
11824     CGM.getDiags().Report(SLoc, DiagID);
11825     return;
11826   }
11827 
11828   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11829   // limits.
11830   if (ISA == 's' && UserVLEN != 0) {
11831     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11832       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11833           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11834                                       "lanes in the architectural constraints "
11835                                       "for SVE (min is 128-bit, max is "
11836                                       "2048-bit, by steps of 128-bit)");
11837       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11838       return;
11839     }
11840   }
11841 
11842   // Sort out parameter sequence.
11843   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11844   StringRef Prefix = "_ZGV";
11845   // Generate simdlen from user input (if any).
11846   if (UserVLEN) {
11847     if (ISA == 's') {
11848       // SVE generates only a masked function.
11849       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11850                            OutputBecomesInput, Fn);
11851     } else {
11852       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11853       // Advanced SIMD generates one or two functions, depending on
11854       // the `[not]inbranch` clause.
11855       switch (State) {
11856       case OMPDeclareSimdDeclAttr::BS_Undefined:
11857         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11858                              OutputBecomesInput, Fn);
11859         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11860                              OutputBecomesInput, Fn);
11861         break;
11862       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11863         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11864                              OutputBecomesInput, Fn);
11865         break;
11866       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11867         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11868                              OutputBecomesInput, Fn);
11869         break;
11870       }
11871     }
11872   } else {
11873     // If no user simdlen is provided, follow the AAVFABI rules for
11874     // generating the vector length.
11875     if (ISA == 's') {
11876       // SVE, section 3.4.1, item 1.
11877       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11878                            OutputBecomesInput, Fn);
11879     } else {
11880       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11881       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11882       // two vector names depending on the use of the clause
11883       // `[not]inbranch`.
11884       switch (State) {
11885       case OMPDeclareSimdDeclAttr::BS_Undefined:
11886         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11887                                   OutputBecomesInput, Fn);
11888         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11889                                   OutputBecomesInput, Fn);
11890         break;
11891       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11892         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11893                                   OutputBecomesInput, Fn);
11894         break;
11895       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11896         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11897                                   OutputBecomesInput, Fn);
11898         break;
11899       }
11900     }
11901   }
11902 }
11903 
11904 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11905                                               llvm::Function *Fn) {
11906   ASTContext &C = CGM.getContext();
11907   FD = FD->getMostRecentDecl();
11908   // Map params to their positions in function decl.
11909   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11910   if (isa<CXXMethodDecl>(FD))
11911     ParamPositions.try_emplace(FD, 0);
11912   unsigned ParamPos = ParamPositions.size();
11913   for (const ParmVarDecl *P : FD->parameters()) {
11914     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11915     ++ParamPos;
11916   }
11917   while (FD) {
11918     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11919       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11920       // Mark uniform parameters.
11921       for (const Expr *E : Attr->uniforms()) {
11922         E = E->IgnoreParenImpCasts();
11923         unsigned Pos;
11924         if (isa<CXXThisExpr>(E)) {
11925           Pos = ParamPositions[FD];
11926         } else {
11927           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11928                                 ->getCanonicalDecl();
11929           Pos = ParamPositions[PVD];
11930         }
11931         ParamAttrs[Pos].Kind = Uniform;
11932       }
11933       // Get alignment info.
11934       auto NI = Attr->alignments_begin();
11935       for (const Expr *E : Attr->aligneds()) {
11936         E = E->IgnoreParenImpCasts();
11937         unsigned Pos;
11938         QualType ParmTy;
11939         if (isa<CXXThisExpr>(E)) {
11940           Pos = ParamPositions[FD];
11941           ParmTy = E->getType();
11942         } else {
11943           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11944                                 ->getCanonicalDecl();
11945           Pos = ParamPositions[PVD];
11946           ParmTy = PVD->getType();
11947         }
11948         ParamAttrs[Pos].Alignment =
11949             (*NI)
11950                 ? (*NI)->EvaluateKnownConstInt(C)
11951                 : llvm::APSInt::getUnsigned(
11952                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11953                           .getQuantity());
11954         ++NI;
11955       }
11956       // Mark linear parameters.
11957       auto SI = Attr->steps_begin();
11958       auto MI = Attr->modifiers_begin();
11959       for (const Expr *E : Attr->linears()) {
11960         E = E->IgnoreParenImpCasts();
11961         unsigned Pos;
11962         // Rescaling factor needed to compute the linear parameter
11963         // value in the mangled name.
11964         unsigned PtrRescalingFactor = 1;
11965         if (isa<CXXThisExpr>(E)) {
11966           Pos = ParamPositions[FD];
11967         } else {
11968           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11969                                 ->getCanonicalDecl();
11970           Pos = ParamPositions[PVD];
11971           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11972             PtrRescalingFactor = CGM.getContext()
11973                                      .getTypeSizeInChars(P->getPointeeType())
11974                                      .getQuantity();
11975         }
11976         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11977         ParamAttr.Kind = Linear;
11978         // Assuming a stride of 1, for `linear` without modifiers.
11979         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11980         if (*SI) {
11981           Expr::EvalResult Result;
11982           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11983             if (const auto *DRE =
11984                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11985               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11986                 ParamAttr.Kind = LinearWithVarStride;
11987                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11988                     ParamPositions[StridePVD->getCanonicalDecl()]);
11989               }
11990             }
11991           } else {
11992             ParamAttr.StrideOrArg = Result.Val.getInt();
11993           }
11994         }
11995         // If we are using a linear clause on a pointer, we need to
11996         // rescale the value of linear_step with the byte size of the
11997         // pointee type.
11998         if (Linear == ParamAttr.Kind)
11999           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12000         ++SI;
12001         ++MI;
12002       }
12003       llvm::APSInt VLENVal;
12004       SourceLocation ExprLoc;
12005       const Expr *VLENExpr = Attr->getSimdlen();
12006       if (VLENExpr) {
12007         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12008         ExprLoc = VLENExpr->getExprLoc();
12009       }
12010       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12011       if (CGM.getTriple().isX86()) {
12012         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12013       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12014         unsigned VLEN = VLENVal.getExtValue();
12015         StringRef MangledName = Fn->getName();
12016         if (CGM.getTarget().hasFeature("sve"))
12017           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12018                                          MangledName, 's', 128, Fn, ExprLoc);
12019         if (CGM.getTarget().hasFeature("neon"))
12020           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12021                                          MangledName, 'n', 128, Fn, ExprLoc);
12022       }
12023     }
12024     FD = FD->getPreviousDecl();
12025   }
12026 }
12027 
12028 namespace {
12029 /// Cleanup action for doacross support.
12030 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12031 public:
12032   static const int DoacrossFinArgs = 2;
12033 
12034 private:
12035   llvm::FunctionCallee RTLFn;
12036   llvm::Value *Args[DoacrossFinArgs];
12037 
12038 public:
12039   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12040                     ArrayRef<llvm::Value *> CallArgs)
12041       : RTLFn(RTLFn) {
12042     assert(CallArgs.size() == DoacrossFinArgs);
12043     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12044   }
12045   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12046     if (!CGF.HaveInsertPoint())
12047       return;
12048     CGF.EmitRuntimeCall(RTLFn, Args);
12049   }
12050 };
12051 } // namespace
12052 
12053 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12054                                        const OMPLoopDirective &D,
12055                                        ArrayRef<Expr *> NumIterations) {
12056   if (!CGF.HaveInsertPoint())
12057     return;
12058 
12059   ASTContext &C = CGM.getContext();
12060   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12061   RecordDecl *RD;
12062   if (KmpDimTy.isNull()) {
12063     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12064     //  kmp_int64 lo; // lower
12065     //  kmp_int64 up; // upper
12066     //  kmp_int64 st; // stride
12067     // };
12068     RD = C.buildImplicitRecord("kmp_dim");
12069     RD->startDefinition();
12070     addFieldToRecordDecl(C, RD, Int64Ty);
12071     addFieldToRecordDecl(C, RD, Int64Ty);
12072     addFieldToRecordDecl(C, RD, Int64Ty);
12073     RD->completeDefinition();
12074     KmpDimTy = C.getRecordType(RD);
12075   } else {
12076     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12077   }
12078   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12079   QualType ArrayTy =
12080       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12081 
12082   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12083   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12084   enum { LowerFD = 0, UpperFD, StrideFD };
12085   // Fill dims with data.
12086   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12087     LValue DimsLVal = CGF.MakeAddrLValue(
12088         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12089     // dims.upper = num_iterations;
12090     LValue UpperLVal = CGF.EmitLValueForField(
12091         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12092     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12093         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12094         Int64Ty, NumIterations[I]->getExprLoc());
12095     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12096     // dims.stride = 1;
12097     LValue StrideLVal = CGF.EmitLValueForField(
12098         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12099     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12100                           StrideLVal);
12101   }
12102 
12103   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12104   // kmp_int32 num_dims, struct kmp_dim * dims);
12105   llvm::Value *Args[] = {
12106       emitUpdateLocation(CGF, D.getBeginLoc()),
12107       getThreadID(CGF, D.getBeginLoc()),
12108       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12109       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12110           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12111           CGM.VoidPtrTy)};
12112 
12113   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12114       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12115   CGF.EmitRuntimeCall(RTLFn, Args);
12116   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12117       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12118   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12119       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12120   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12121                                              llvm::makeArrayRef(FiniArgs));
12122 }
12123 
12124 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12125                                           const OMPDependClause *C) {
12126   QualType Int64Ty =
12127       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12128   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12129   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12130       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12131   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12132   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12133     const Expr *CounterVal = C->getLoopData(I);
12134     assert(CounterVal);
12135     llvm::Value *CntVal = CGF.EmitScalarConversion(
12136         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12137         CounterVal->getExprLoc());
12138     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12139                           /*Volatile=*/false, Int64Ty);
12140   }
12141   llvm::Value *Args[] = {
12142       emitUpdateLocation(CGF, C->getBeginLoc()),
12143       getThreadID(CGF, C->getBeginLoc()),
12144       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12145   llvm::FunctionCallee RTLFn;
12146   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12147     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12148                                                   OMPRTL___kmpc_doacross_post);
12149   } else {
12150     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12151     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12152                                                   OMPRTL___kmpc_doacross_wait);
12153   }
12154   CGF.EmitRuntimeCall(RTLFn, Args);
12155 }
12156 
12157 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12158                                llvm::FunctionCallee Callee,
12159                                ArrayRef<llvm::Value *> Args) const {
12160   assert(Loc.isValid() && "Outlined function call location must be valid.");
12161   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12162 
12163   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12164     if (Fn->doesNotThrow()) {
12165       CGF.EmitNounwindRuntimeCall(Fn, Args);
12166       return;
12167     }
12168   }
12169   CGF.EmitRuntimeCall(Callee, Args);
12170 }
12171 
12172 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12173     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12174     ArrayRef<llvm::Value *> Args) const {
12175   emitCall(CGF, Loc, OutlinedFn, Args);
12176 }
12177 
12178 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12179   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12180     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12181       HasEmittedDeclareTargetRegion = true;
12182 }
12183 
12184 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12185                                              const VarDecl *NativeParam,
12186                                              const VarDecl *TargetParam) const {
12187   return CGF.GetAddrOfLocalVar(NativeParam);
12188 }
12189 
12190 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12191                                                    const VarDecl *VD) {
12192   if (!VD)
12193     return Address::invalid();
12194   Address UntiedAddr = Address::invalid();
12195   Address UntiedRealAddr = Address::invalid();
12196   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12197   if (It != FunctionToUntiedTaskStackMap.end()) {
12198     const UntiedLocalVarsAddressesMap &UntiedData =
12199         UntiedLocalVarsStack[It->second];
12200     auto I = UntiedData.find(VD);
12201     if (I != UntiedData.end()) {
12202       UntiedAddr = I->second.first;
12203       UntiedRealAddr = I->second.second;
12204     }
12205   }
12206   const VarDecl *CVD = VD->getCanonicalDecl();
12207   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12208     // Use the default allocation.
12209     if (!isAllocatableDecl(VD))
12210       return UntiedAddr;
12211     llvm::Value *Size;
12212     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12213     if (CVD->getType()->isVariablyModifiedType()) {
12214       Size = CGF.getTypeSize(CVD->getType());
12215       // Align the size: ((size + align - 1) / align) * align
12216       Size = CGF.Builder.CreateNUWAdd(
12217           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12218       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12219       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12220     } else {
12221       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12222       Size = CGM.getSize(Sz.alignTo(Align));
12223     }
12224     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12225     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12226     assert(AA->getAllocator() &&
12227            "Expected allocator expression for non-default allocator.");
12228     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12229     // According to the standard, the original allocator type is a enum
12230     // (integer). Convert to pointer type, if required.
12231     Allocator = CGF.EmitScalarConversion(
12232         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12233         AA->getAllocator()->getExprLoc());
12234     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12235 
12236     llvm::Value *Addr =
12237         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12238                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12239                             Args, getName({CVD->getName(), ".void.addr"}));
12240     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12241         CGM.getModule(), OMPRTL___kmpc_free);
12242     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12243     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12244         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12245     if (UntiedAddr.isValid())
12246       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12247 
12248     // Cleanup action for allocate support.
12249     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12250       llvm::FunctionCallee RTLFn;
12251       SourceLocation::UIntTy LocEncoding;
12252       Address Addr;
12253       const Expr *Allocator;
12254 
12255     public:
12256       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12257                            SourceLocation::UIntTy LocEncoding, Address Addr,
12258                            const Expr *Allocator)
12259           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12260             Allocator(Allocator) {}
12261       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12262         if (!CGF.HaveInsertPoint())
12263           return;
12264         llvm::Value *Args[3];
12265         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12266             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12267         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12268             Addr.getPointer(), CGF.VoidPtrTy);
12269         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12270         // According to the standard, the original allocator type is a enum
12271         // (integer). Convert to pointer type, if required.
12272         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12273                                             CGF.getContext().VoidPtrTy,
12274                                             Allocator->getExprLoc());
12275         Args[2] = AllocVal;
12276 
12277         CGF.EmitRuntimeCall(RTLFn, Args);
12278       }
12279     };
12280     Address VDAddr =
12281         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12282     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12283         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12284         VDAddr, AA->getAllocator());
12285     if (UntiedRealAddr.isValid())
12286       if (auto *Region =
12287               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12288         Region->emitUntiedSwitch(CGF);
12289     return VDAddr;
12290   }
12291   return UntiedAddr;
12292 }
12293 
12294 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12295                                              const VarDecl *VD) const {
12296   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12297   if (It == FunctionToUntiedTaskStackMap.end())
12298     return false;
12299   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12300 }
12301 
12302 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12303     CodeGenModule &CGM, const OMPLoopDirective &S)
12304     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12305   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12306   if (!NeedToPush)
12307     return;
12308   NontemporalDeclsSet &DS =
12309       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12310   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12311     for (const Stmt *Ref : C->private_refs()) {
12312       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12313       const ValueDecl *VD;
12314       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12315         VD = DRE->getDecl();
12316       } else {
12317         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12318         assert((ME->isImplicitCXXThis() ||
12319                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12320                "Expected member of current class.");
12321         VD = ME->getMemberDecl();
12322       }
12323       DS.insert(VD);
12324     }
12325   }
12326 }
12327 
12328 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12329   if (!NeedToPush)
12330     return;
12331   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12332 }
12333 
12334 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12335     CodeGenFunction &CGF,
12336     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12337                           std::pair<Address, Address>> &LocalVars)
12338     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12339   if (!NeedToPush)
12340     return;
12341   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12342       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12343   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12344 }
12345 
12346 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12347   if (!NeedToPush)
12348     return;
12349   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12350 }
12351 
12352 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12353   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12354 
12355   return llvm::any_of(
12356       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12357       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12358 }
12359 
12360 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12361     const OMPExecutableDirective &S,
12362     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12363     const {
12364   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12365   // Vars in target/task regions must be excluded completely.
12366   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12367       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12368     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12369     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12370     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12371     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12372       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12373         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12374     }
12375   }
12376   // Exclude vars in private clauses.
12377   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12378     for (const Expr *Ref : C->varlists()) {
12379       if (!Ref->getType()->isScalarType())
12380         continue;
12381       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12382       if (!DRE)
12383         continue;
12384       NeedToCheckForLPCs.insert(DRE->getDecl());
12385     }
12386   }
12387   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12388     for (const Expr *Ref : C->varlists()) {
12389       if (!Ref->getType()->isScalarType())
12390         continue;
12391       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12392       if (!DRE)
12393         continue;
12394       NeedToCheckForLPCs.insert(DRE->getDecl());
12395     }
12396   }
12397   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12398     for (const Expr *Ref : C->varlists()) {
12399       if (!Ref->getType()->isScalarType())
12400         continue;
12401       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12402       if (!DRE)
12403         continue;
12404       NeedToCheckForLPCs.insert(DRE->getDecl());
12405     }
12406   }
12407   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12408     for (const Expr *Ref : C->varlists()) {
12409       if (!Ref->getType()->isScalarType())
12410         continue;
12411       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12412       if (!DRE)
12413         continue;
12414       NeedToCheckForLPCs.insert(DRE->getDecl());
12415     }
12416   }
12417   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12418     for (const Expr *Ref : C->varlists()) {
12419       if (!Ref->getType()->isScalarType())
12420         continue;
12421       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12422       if (!DRE)
12423         continue;
12424       NeedToCheckForLPCs.insert(DRE->getDecl());
12425     }
12426   }
12427   for (const Decl *VD : NeedToCheckForLPCs) {
12428     for (const LastprivateConditionalData &Data :
12429          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12430       if (Data.DeclToUniqueName.count(VD) > 0) {
12431         if (!Data.Disabled)
12432           NeedToAddForLPCsAsDisabled.insert(VD);
12433         break;
12434       }
12435     }
12436   }
12437 }
12438 
12439 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12440     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12441     : CGM(CGF.CGM),
12442       Action((CGM.getLangOpts().OpenMP >= 50 &&
12443               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12444                            [](const OMPLastprivateClause *C) {
12445                              return C->getKind() ==
12446                                     OMPC_LASTPRIVATE_conditional;
12447                            }))
12448                  ? ActionToDo::PushAsLastprivateConditional
12449                  : ActionToDo::DoNotPush) {
12450   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12451   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12452     return;
12453   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12454          "Expected a push action.");
12455   LastprivateConditionalData &Data =
12456       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12457   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12458     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12459       continue;
12460 
12461     for (const Expr *Ref : C->varlists()) {
12462       Data.DeclToUniqueName.insert(std::make_pair(
12463           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12464           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12465     }
12466   }
12467   Data.IVLVal = IVLVal;
12468   Data.Fn = CGF.CurFn;
12469 }
12470 
12471 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12472     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12473     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12474   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12475   if (CGM.getLangOpts().OpenMP < 50)
12476     return;
12477   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12478   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12479   if (!NeedToAddForLPCsAsDisabled.empty()) {
12480     Action = ActionToDo::DisableLastprivateConditional;
12481     LastprivateConditionalData &Data =
12482         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12483     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12484       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12485     Data.Fn = CGF.CurFn;
12486     Data.Disabled = true;
12487   }
12488 }
12489 
12490 CGOpenMPRuntime::LastprivateConditionalRAII
12491 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12492     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12493   return LastprivateConditionalRAII(CGF, S);
12494 }
12495 
12496 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12497   if (CGM.getLangOpts().OpenMP < 50)
12498     return;
12499   if (Action == ActionToDo::DisableLastprivateConditional) {
12500     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12501            "Expected list of disabled private vars.");
12502     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12503   }
12504   if (Action == ActionToDo::PushAsLastprivateConditional) {
12505     assert(
12506         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12507         "Expected list of lastprivate conditional vars.");
12508     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12509   }
12510 }
12511 
12512 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12513                                                         const VarDecl *VD) {
12514   ASTContext &C = CGM.getContext();
12515   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12516   if (I == LastprivateConditionalToTypes.end())
12517     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12518   QualType NewType;
12519   const FieldDecl *VDField;
12520   const FieldDecl *FiredField;
12521   LValue BaseLVal;
12522   auto VI = I->getSecond().find(VD);
12523   if (VI == I->getSecond().end()) {
12524     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12525     RD->startDefinition();
12526     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12527     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12528     RD->completeDefinition();
12529     NewType = C.getRecordType(RD);
12530     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12531     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12532     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12533   } else {
12534     NewType = std::get<0>(VI->getSecond());
12535     VDField = std::get<1>(VI->getSecond());
12536     FiredField = std::get<2>(VI->getSecond());
12537     BaseLVal = std::get<3>(VI->getSecond());
12538   }
12539   LValue FiredLVal =
12540       CGF.EmitLValueForField(BaseLVal, FiredField);
12541   CGF.EmitStoreOfScalar(
12542       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12543       FiredLVal);
12544   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12545 }
12546 
12547 namespace {
12548 /// Checks if the lastprivate conditional variable is referenced in LHS.
12549 class LastprivateConditionalRefChecker final
12550     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12551   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12552   const Expr *FoundE = nullptr;
12553   const Decl *FoundD = nullptr;
12554   StringRef UniqueDeclName;
12555   LValue IVLVal;
12556   llvm::Function *FoundFn = nullptr;
12557   SourceLocation Loc;
12558 
12559 public:
12560   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12561     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12562          llvm::reverse(LPM)) {
12563       auto It = D.DeclToUniqueName.find(E->getDecl());
12564       if (It == D.DeclToUniqueName.end())
12565         continue;
12566       if (D.Disabled)
12567         return false;
12568       FoundE = E;
12569       FoundD = E->getDecl()->getCanonicalDecl();
12570       UniqueDeclName = It->second;
12571       IVLVal = D.IVLVal;
12572       FoundFn = D.Fn;
12573       break;
12574     }
12575     return FoundE == E;
12576   }
12577   bool VisitMemberExpr(const MemberExpr *E) {
12578     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12579       return false;
12580     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12581          llvm::reverse(LPM)) {
12582       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12583       if (It == D.DeclToUniqueName.end())
12584         continue;
12585       if (D.Disabled)
12586         return false;
12587       FoundE = E;
12588       FoundD = E->getMemberDecl()->getCanonicalDecl();
12589       UniqueDeclName = It->second;
12590       IVLVal = D.IVLVal;
12591       FoundFn = D.Fn;
12592       break;
12593     }
12594     return FoundE == E;
12595   }
12596   bool VisitStmt(const Stmt *S) {
12597     for (const Stmt *Child : S->children()) {
12598       if (!Child)
12599         continue;
12600       if (const auto *E = dyn_cast<Expr>(Child))
12601         if (!E->isGLValue())
12602           continue;
12603       if (Visit(Child))
12604         return true;
12605     }
12606     return false;
12607   }
12608   explicit LastprivateConditionalRefChecker(
12609       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12610       : LPM(LPM) {}
12611   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12612   getFoundData() const {
12613     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12614   }
12615 };
12616 } // namespace
12617 
12618 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12619                                                        LValue IVLVal,
12620                                                        StringRef UniqueDeclName,
12621                                                        LValue LVal,
12622                                                        SourceLocation Loc) {
12623   // Last updated loop counter for the lastprivate conditional var.
12624   // int<xx> last_iv = 0;
12625   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12626   llvm::Constant *LastIV =
12627       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12628   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12629       IVLVal.getAlignment().getAsAlign());
12630   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12631 
12632   // Last value of the lastprivate conditional.
12633   // decltype(priv_a) last_a;
12634   llvm::Constant *Last = getOrCreateInternalVariable(
12635       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12636   cast<llvm::GlobalVariable>(Last)->setAlignment(
12637       LVal.getAlignment().getAsAlign());
12638   LValue LastLVal =
12639       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12640 
12641   // Global loop counter. Required to handle inner parallel-for regions.
12642   // iv
12643   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12644 
12645   // #pragma omp critical(a)
12646   // if (last_iv <= iv) {
12647   //   last_iv = iv;
12648   //   last_a = priv_a;
12649   // }
12650   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12651                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12652     Action.Enter(CGF);
12653     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12654     // (last_iv <= iv) ? Check if the variable is updated and store new
12655     // value in global var.
12656     llvm::Value *CmpRes;
12657     if (IVLVal.getType()->isSignedIntegerType()) {
12658       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12659     } else {
12660       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12661              "Loop iteration variable must be integer.");
12662       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12663     }
12664     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12665     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12666     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12667     // {
12668     CGF.EmitBlock(ThenBB);
12669 
12670     //   last_iv = iv;
12671     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12672 
12673     //   last_a = priv_a;
12674     switch (CGF.getEvaluationKind(LVal.getType())) {
12675     case TEK_Scalar: {
12676       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12677       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12678       break;
12679     }
12680     case TEK_Complex: {
12681       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12682       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12683       break;
12684     }
12685     case TEK_Aggregate:
12686       llvm_unreachable(
12687           "Aggregates are not supported in lastprivate conditional.");
12688     }
12689     // }
12690     CGF.EmitBranch(ExitBB);
12691     // There is no need to emit line number for unconditional branch.
12692     (void)ApplyDebugLocation::CreateEmpty(CGF);
12693     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12694   };
12695 
12696   if (CGM.getLangOpts().OpenMPSimd) {
12697     // Do not emit as a critical region as no parallel region could be emitted.
12698     RegionCodeGenTy ThenRCG(CodeGen);
12699     ThenRCG(CGF);
12700   } else {
12701     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12702   }
12703 }
12704 
12705 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12706                                                          const Expr *LHS) {
12707   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12708     return;
12709   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12710   if (!Checker.Visit(LHS))
12711     return;
12712   const Expr *FoundE;
12713   const Decl *FoundD;
12714   StringRef UniqueDeclName;
12715   LValue IVLVal;
12716   llvm::Function *FoundFn;
12717   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12718       Checker.getFoundData();
12719   if (FoundFn != CGF.CurFn) {
12720     // Special codegen for inner parallel regions.
12721     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12722     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12723     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12724            "Lastprivate conditional is not found in outer region.");
12725     QualType StructTy = std::get<0>(It->getSecond());
12726     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12727     LValue PrivLVal = CGF.EmitLValue(FoundE);
12728     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12729         PrivLVal.getAddress(CGF),
12730         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12731     LValue BaseLVal =
12732         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12733     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12734     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12735                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12736                         FiredLVal, llvm::AtomicOrdering::Unordered,
12737                         /*IsVolatile=*/true, /*isInit=*/false);
12738     return;
12739   }
12740 
12741   // Private address of the lastprivate conditional in the current context.
12742   // priv_a
12743   LValue LVal = CGF.EmitLValue(FoundE);
12744   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12745                                    FoundE->getExprLoc());
12746 }
12747 
12748 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12749     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12750     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12751   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12752     return;
12753   auto Range = llvm::reverse(LastprivateConditionalStack);
12754   auto It = llvm::find_if(
12755       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12756   if (It == Range.end() || It->Fn != CGF.CurFn)
12757     return;
12758   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12759   assert(LPCI != LastprivateConditionalToTypes.end() &&
12760          "Lastprivates must be registered already.");
12761   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12762   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12763   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12764   for (const auto &Pair : It->DeclToUniqueName) {
12765     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12766     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12767       continue;
12768     auto I = LPCI->getSecond().find(Pair.first);
12769     assert(I != LPCI->getSecond().end() &&
12770            "Lastprivate must be rehistered already.");
12771     // bool Cmp = priv_a.Fired != 0;
12772     LValue BaseLVal = std::get<3>(I->getSecond());
12773     LValue FiredLVal =
12774         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12775     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12776     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12777     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12778     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12779     // if (Cmp) {
12780     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12781     CGF.EmitBlock(ThenBB);
12782     Address Addr = CGF.GetAddrOfLocalVar(VD);
12783     LValue LVal;
12784     if (VD->getType()->isReferenceType())
12785       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12786                                            AlignmentSource::Decl);
12787     else
12788       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12789                                 AlignmentSource::Decl);
12790     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12791                                      D.getBeginLoc());
12792     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12793     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12794     // }
12795   }
12796 }
12797 
12798 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12799     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12800     SourceLocation Loc) {
12801   if (CGF.getLangOpts().OpenMP < 50)
12802     return;
12803   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12804   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12805          "Unknown lastprivate conditional variable.");
12806   StringRef UniqueName = It->second;
12807   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12808   // The variable was not updated in the region - exit.
12809   if (!GV)
12810     return;
12811   LValue LPLVal = CGF.MakeAddrLValue(
12812       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12813   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12814   CGF.EmitStoreOfScalar(Res, PrivLVal);
12815 }
12816 
12817 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12818     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12819     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12820   llvm_unreachable("Not supported in SIMD-only mode");
12821 }
12822 
12823 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12824     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12825     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12826   llvm_unreachable("Not supported in SIMD-only mode");
12827 }
12828 
12829 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12830     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12831     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12832     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12833     bool Tied, unsigned &NumberOfParts) {
12834   llvm_unreachable("Not supported in SIMD-only mode");
12835 }
12836 
12837 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12838                                            SourceLocation Loc,
12839                                            llvm::Function *OutlinedFn,
12840                                            ArrayRef<llvm::Value *> CapturedVars,
12841                                            const Expr *IfCond) {
12842   llvm_unreachable("Not supported in SIMD-only mode");
12843 }
12844 
12845 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12846     CodeGenFunction &CGF, StringRef CriticalName,
12847     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12848     const Expr *Hint) {
12849   llvm_unreachable("Not supported in SIMD-only mode");
12850 }
12851 
12852 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12853                                            const RegionCodeGenTy &MasterOpGen,
12854                                            SourceLocation Loc) {
12855   llvm_unreachable("Not supported in SIMD-only mode");
12856 }
12857 
12858 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12859                                            const RegionCodeGenTy &MasterOpGen,
12860                                            SourceLocation Loc,
12861                                            const Expr *Filter) {
12862   llvm_unreachable("Not supported in SIMD-only mode");
12863 }
12864 
12865 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12866                                             SourceLocation Loc) {
12867   llvm_unreachable("Not supported in SIMD-only mode");
12868 }
12869 
12870 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12871     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12872     SourceLocation Loc) {
12873   llvm_unreachable("Not supported in SIMD-only mode");
12874 }
12875 
12876 void CGOpenMPSIMDRuntime::emitSingleRegion(
12877     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12878     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12879     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12880     ArrayRef<const Expr *> AssignmentOps) {
12881   llvm_unreachable("Not supported in SIMD-only mode");
12882 }
12883 
12884 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12885                                             const RegionCodeGenTy &OrderedOpGen,
12886                                             SourceLocation Loc,
12887                                             bool IsThreads) {
12888   llvm_unreachable("Not supported in SIMD-only mode");
12889 }
12890 
12891 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12892                                           SourceLocation Loc,
12893                                           OpenMPDirectiveKind Kind,
12894                                           bool EmitChecks,
12895                                           bool ForceSimpleCall) {
12896   llvm_unreachable("Not supported in SIMD-only mode");
12897 }
12898 
12899 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12900     CodeGenFunction &CGF, SourceLocation Loc,
12901     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12902     bool Ordered, const DispatchRTInput &DispatchValues) {
12903   llvm_unreachable("Not supported in SIMD-only mode");
12904 }
12905 
12906 void CGOpenMPSIMDRuntime::emitForStaticInit(
12907     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12908     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12909   llvm_unreachable("Not supported in SIMD-only mode");
12910 }
12911 
12912 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12913     CodeGenFunction &CGF, SourceLocation Loc,
12914     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12915   llvm_unreachable("Not supported in SIMD-only mode");
12916 }
12917 
12918 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12919                                                      SourceLocation Loc,
12920                                                      unsigned IVSize,
12921                                                      bool IVSigned) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
12925 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12926                                               SourceLocation Loc,
12927                                               OpenMPDirectiveKind DKind) {
12928   llvm_unreachable("Not supported in SIMD-only mode");
12929 }
12930 
12931 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12932                                               SourceLocation Loc,
12933                                               unsigned IVSize, bool IVSigned,
12934                                               Address IL, Address LB,
12935                                               Address UB, Address ST) {
12936   llvm_unreachable("Not supported in SIMD-only mode");
12937 }
12938 
12939 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12940                                                llvm::Value *NumThreads,
12941                                                SourceLocation Loc) {
12942   llvm_unreachable("Not supported in SIMD-only mode");
12943 }
12944 
12945 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12946                                              ProcBindKind ProcBind,
12947                                              SourceLocation Loc) {
12948   llvm_unreachable("Not supported in SIMD-only mode");
12949 }
12950 
12951 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12952                                                     const VarDecl *VD,
12953                                                     Address VDAddr,
12954                                                     SourceLocation Loc) {
12955   llvm_unreachable("Not supported in SIMD-only mode");
12956 }
12957 
12958 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12959     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12960     CodeGenFunction *CGF) {
12961   llvm_unreachable("Not supported in SIMD-only mode");
12962 }
12963 
12964 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12965     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12966   llvm_unreachable("Not supported in SIMD-only mode");
12967 }
12968 
12969 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12970                                     ArrayRef<const Expr *> Vars,
12971                                     SourceLocation Loc,
12972                                     llvm::AtomicOrdering AO) {
12973   llvm_unreachable("Not supported in SIMD-only mode");
12974 }
12975 
12976 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12977                                        const OMPExecutableDirective &D,
12978                                        llvm::Function *TaskFunction,
12979                                        QualType SharedsTy, Address Shareds,
12980                                        const Expr *IfCond,
12981                                        const OMPTaskDataTy &Data) {
12982   llvm_unreachable("Not supported in SIMD-only mode");
12983 }
12984 
12985 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12986     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12987     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12988     const Expr *IfCond, const OMPTaskDataTy &Data) {
12989   llvm_unreachable("Not supported in SIMD-only mode");
12990 }
12991 
12992 void CGOpenMPSIMDRuntime::emitReduction(
12993     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12994     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12995     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12996   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12997   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12998                                  ReductionOps, Options);
12999 }
13000 
13001 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13002     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13003     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13004   llvm_unreachable("Not supported in SIMD-only mode");
13005 }
13006 
13007 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13008                                                 SourceLocation Loc,
13009                                                 bool IsWorksharingReduction) {
13010   llvm_unreachable("Not supported in SIMD-only mode");
13011 }
13012 
13013 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13014                                                   SourceLocation Loc,
13015                                                   ReductionCodeGen &RCG,
13016                                                   unsigned N) {
13017   llvm_unreachable("Not supported in SIMD-only mode");
13018 }
13019 
13020 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13021                                                   SourceLocation Loc,
13022                                                   llvm::Value *ReductionsPtr,
13023                                                   LValue SharedLVal) {
13024   llvm_unreachable("Not supported in SIMD-only mode");
13025 }
13026 
13027 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13028                                            SourceLocation Loc) {
13029   llvm_unreachable("Not supported in SIMD-only mode");
13030 }
13031 
13032 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13033     CodeGenFunction &CGF, SourceLocation Loc,
13034     OpenMPDirectiveKind CancelRegion) {
13035   llvm_unreachable("Not supported in SIMD-only mode");
13036 }
13037 
13038 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13039                                          SourceLocation Loc, const Expr *IfCond,
13040                                          OpenMPDirectiveKind CancelRegion) {
13041   llvm_unreachable("Not supported in SIMD-only mode");
13042 }
13043 
13044 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13045     const OMPExecutableDirective &D, StringRef ParentName,
13046     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13047     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13048   llvm_unreachable("Not supported in SIMD-only mode");
13049 }
13050 
13051 void CGOpenMPSIMDRuntime::emitTargetCall(
13052     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13053     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13054     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13055     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13056                                      const OMPLoopDirective &D)>
13057         SizeEmitter) {
13058   llvm_unreachable("Not supported in SIMD-only mode");
13059 }
13060 
13061 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13062   llvm_unreachable("Not supported in SIMD-only mode");
13063 }
13064 
13065 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13066   llvm_unreachable("Not supported in SIMD-only mode");
13067 }
13068 
13069 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13070   return false;
13071 }
13072 
13073 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13074                                         const OMPExecutableDirective &D,
13075                                         SourceLocation Loc,
13076                                         llvm::Function *OutlinedFn,
13077                                         ArrayRef<llvm::Value *> CapturedVars) {
13078   llvm_unreachable("Not supported in SIMD-only mode");
13079 }
13080 
13081 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13082                                              const Expr *NumTeams,
13083                                              const Expr *ThreadLimit,
13084                                              SourceLocation Loc) {
13085   llvm_unreachable("Not supported in SIMD-only mode");
13086 }
13087 
13088 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13089     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13090     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13091   llvm_unreachable("Not supported in SIMD-only mode");
13092 }
13093 
13094 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13095     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13096     const Expr *Device) {
13097   llvm_unreachable("Not supported in SIMD-only mode");
13098 }
13099 
13100 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13101                                            const OMPLoopDirective &D,
13102                                            ArrayRef<Expr *> NumIterations) {
13103   llvm_unreachable("Not supported in SIMD-only mode");
13104 }
13105 
13106 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13107                                               const OMPDependClause *C) {
13108   llvm_unreachable("Not supported in SIMD-only mode");
13109 }
13110 
13111 const VarDecl *
13112 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13113                                         const VarDecl *NativeParam) const {
13114   llvm_unreachable("Not supported in SIMD-only mode");
13115 }
13116 
13117 Address
13118 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13119                                          const VarDecl *NativeParam,
13120                                          const VarDecl *TargetParam) const {
13121   llvm_unreachable("Not supported in SIMD-only mode");
13122 }
13123