1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/BitmaskEnum.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413   bool NoInheritance = false;
414 
415 public:
416   /// Constructs region for combined constructs.
417   /// \param CodeGen Code generation sequence for combined directives. Includes
418   /// a list of functions used for code generation of implicitly inlined
419   /// regions.
420   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421                           OpenMPDirectiveKind Kind, bool HasCancel,
422                           bool NoInheritance = true)
423       : CGF(CGF), NoInheritance(NoInheritance) {
424     // Start emission for the construct.
425     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427     if (NoInheritance) {
428       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430       CGF.LambdaThisCaptureField = nullptr;
431       BlockInfo = CGF.BlockInfo;
432       CGF.BlockInfo = nullptr;
433     }
434   }
435 
436   ~InlinedOpenMPRegionRAII() {
437     // Restore original CapturedStmtInfo only if we're done with code emission.
438     auto *OldCSI =
439         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440     delete CGF.CapturedStmtInfo;
441     CGF.CapturedStmtInfo = OldCSI;
442     if (NoInheritance) {
443       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445       CGF.BlockInfo = BlockInfo;
446     }
447   }
448 };
449 
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454   /// Use trampoline for internal microtask.
455   OMP_IDENT_IMD = 0x01,
456   /// Use c-style ident structure.
457   OMP_IDENT_KMPC = 0x02,
458   /// Atomic reduction option for kmpc_reduce.
459   OMP_ATOMIC_REDUCE = 0x10,
460   /// Explicit 'barrier' directive.
461   OMP_IDENT_BARRIER_EXPL = 0x20,
462   /// Implicit barrier in code.
463   OMP_IDENT_BARRIER_IMPL = 0x40,
464   /// Implicit barrier in 'for' directive.
465   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466   /// Implicit barrier in 'sections' directive.
467   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468   /// Implicit barrier in 'single' directive.
469   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470   /// Call of __kmp_for_static_init for static loop.
471   OMP_IDENT_WORK_LOOP = 0x200,
472   /// Call of __kmp_for_static_init for sections.
473   OMP_IDENT_WORK_SECTIONS = 0x400,
474   /// Call of __kmp_for_static_init for distribute.
475   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478 
479 namespace {
480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481 /// Values for bit flags for marking which requires clauses have been used.
482 enum OpenMPOffloadingRequiresDirFlags : int64_t {
483   /// flag undefined.
484   OMP_REQ_UNDEFINED               = 0x000,
485   /// no requires clause present.
486   OMP_REQ_NONE                    = 0x001,
487   /// reverse_offload clause.
488   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
489   /// unified_address clause.
490   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
491   /// unified_shared_memory clause.
492   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
493   /// dynamic_allocators clause.
494   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
495   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497 
498 enum OpenMPOffloadingReservedDeviceIDs {
499   /// Device ID if the device was not defined, runtime should get it
500   /// from environment variables in the spec.
501   OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504 
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
511 ///                                  see above  */
512 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
513 ///                                  KMP_IDENT_KMPC identifies this union
514 ///                                  member  */
515 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
516 ///                                  see above */
517 ///#if USE_ITT_BUILD
518 ///                            /*  but currently used for storing
519 ///                                region-specific ITT */
520 ///                            /*  contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
523 ///                                 C++  */
524 ///    char const *psource;    /**< String describing the source location.
525 ///                            The string is composed of semi-colon separated
526 //                             fields which describe the source file,
527 ///                            the function and a pair of line numbers that
528 ///                            delimit the construct.
529 ///                             */
530 /// } ident_t;
531 enum IdentFieldIndex {
532   /// might be used in Fortran
533   IdentField_Reserved_1,
534   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535   IdentField_Flags,
536   /// Not really used in Fortran any more
537   IdentField_Reserved_2,
538   /// Source[4] in Fortran, do not use for C++
539   IdentField_Reserved_3,
540   /// String describing the source location. The string is composed of
541   /// semi-colon separated fields which describe the source file, the function
542   /// and a pair of line numbers that delimit the construct.
543   IdentField_PSource
544 };
545 
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549   /// Lower bound for default (unordered) versions.
550   OMP_sch_lower = 32,
551   OMP_sch_static_chunked = 33,
552   OMP_sch_static = 34,
553   OMP_sch_dynamic_chunked = 35,
554   OMP_sch_guided_chunked = 36,
555   OMP_sch_runtime = 37,
556   OMP_sch_auto = 38,
557   /// static with chunk adjustment (e.g., simd)
558   OMP_sch_static_balanced_chunked = 45,
559   /// Lower bound for 'ordered' versions.
560   OMP_ord_lower = 64,
561   OMP_ord_static_chunked = 65,
562   OMP_ord_static = 66,
563   OMP_ord_dynamic_chunked = 67,
564   OMP_ord_guided_chunked = 68,
565   OMP_ord_runtime = 69,
566   OMP_ord_auto = 70,
567   OMP_sch_default = OMP_sch_static,
568   /// dist_schedule types
569   OMP_dist_sch_static_chunked = 91,
570   OMP_dist_sch_static = 92,
571   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572   /// Set if the monotonic schedule modifier was present.
573   OMP_sch_modifier_monotonic = (1 << 29),
574   /// Set if the nonmonotonic schedule modifier was present.
575   OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577 
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581   PrePostActionTy *Action;
582 
583 public:
584   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586     if (!CGF.HaveInsertPoint())
587       return;
588     Action->Exit(CGF);
589   }
590 };
591 
592 } // anonymous namespace
593 
594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595   CodeGenFunction::RunCleanupsScope Scope(CGF);
596   if (PrePostAction) {
597     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598     Callback(CodeGen, CGF, *PrePostAction);
599   } else {
600     PrePostActionTy Action;
601     Callback(CodeGen, CGF, Action);
602   }
603 }
604 
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
608 getReductionInit(const Expr *ReductionOp) {
609   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611       if (const auto *DRE =
612               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614           return DRD;
615   return nullptr;
616 }
617 
618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619                                              const OMPDeclareReductionDecl *DRD,
620                                              const Expr *InitOp,
621                                              Address Private, Address Original,
622                                              QualType Ty) {
623   if (DRD->getInitializer()) {
624     std::pair<llvm::Function *, llvm::Function *> Reduction =
625         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626     const auto *CE = cast<CallExpr>(InitOp);
627     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630     const auto *LHSDRE =
631         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632     const auto *RHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636                             [=]() { return Private; });
637     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638                             [=]() { return Original; });
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   DestAddr =
691       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd =
702       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
703   // The basic structure here is a while-do loop.
704   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
705   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
706   llvm::Value *IsEmpty =
707       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
708   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
709 
710   // Enter the loop body, making that address the current address.
711   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
712   CGF.EmitBlock(BodyBB);
713 
714   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
715 
716   llvm::PHINode *SrcElementPHI = nullptr;
717   Address SrcElementCurrent = Address::invalid();
718   if (DRD) {
719     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
720                                           "omp.arraycpy.srcElementPast");
721     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
722     SrcElementCurrent =
723         Address(SrcElementPHI,
724                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725   }
726   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
727       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
728   DestElementPHI->addIncoming(DestBegin, EntryBB);
729   Address DestElementCurrent =
730       Address(DestElementPHI,
731               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732 
733   // Emit copy.
734   {
735     CodeGenFunction::RunCleanupsScope InitScope(CGF);
736     if (EmitDeclareReductionInit) {
737       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
738                                        SrcElementCurrent, ElementTy);
739     } else
740       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
741                            /*IsInitializer=*/false);
742   }
743 
744   if (DRD) {
745     // Shift the address forward by one element.
746     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
747         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
748         "omp.arraycpy.dest.element");
749     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750   }
751 
752   // Shift the address forward by one element.
753   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
754       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
755       "omp.arraycpy.dest.element");
756   // Check whether we've reached the end.
757   llvm::Value *Done =
758       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
759   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
760   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
761 
762   // Done.
763   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
767   return CGF.EmitOMPSharedLValue(E);
768 }
769 
770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
771                                             const Expr *E) {
772   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
773     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
774   return LValue();
775 }
776 
777 void ReductionCodeGen::emitAggregateInitialization(
778     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
779     const OMPDeclareReductionDecl *DRD) {
780   // Emit VarDecl with copy init for arrays.
781   // Get the address of the original variable captured in current
782   // captured region.
783   const auto *PrivateVD =
784       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
785   bool EmitDeclareReductionInit =
786       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
787   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
788                        EmitDeclareReductionInit,
789                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
790                                                 : PrivateVD->getInit(),
791                        DRD, SharedLVal.getAddress(CGF));
792 }
793 
794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
795                                    ArrayRef<const Expr *> Origs,
796                                    ArrayRef<const Expr *> Privates,
797                                    ArrayRef<const Expr *> ReductionOps) {
798   ClausesData.reserve(Shareds.size());
799   SharedAddresses.reserve(Shareds.size());
800   Sizes.reserve(Shareds.size());
801   BaseDecls.reserve(Shareds.size());
802   const auto *IOrig = Origs.begin();
803   const auto *IPriv = Privates.begin();
804   const auto *IRed = ReductionOps.begin();
805   for (const Expr *Ref : Shareds) {
806     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
807     std::advance(IOrig, 1);
808     std::advance(IPriv, 1);
809     std::advance(IRed, 1);
810   }
811 }
812 
813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
814   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
815          "Number of generated lvalues must be exactly N.");
816   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
817   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
818   SharedAddresses.emplace_back(First, Second);
819   if (ClausesData[N].Shared == ClausesData[N].Ref) {
820     OrigAddresses.emplace_back(First, Second);
821   } else {
822     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
823     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
824     OrigAddresses.emplace_back(First, Second);
825   }
826 }
827 
828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
829   const auto *PrivateVD =
830       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
831   QualType PrivateType = PrivateVD->getType();
832   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
833   if (!PrivateType->isVariablyModifiedType()) {
834     Sizes.emplace_back(
835         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
836         nullptr);
837     return;
838   }
839   llvm::Value *Size;
840   llvm::Value *SizeInChars;
841   auto *ElemType =
842       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
843           ->getElementType();
844   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
845   if (AsArraySection) {
846     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
847                                      OrigAddresses[N].first.getPointer(CGF));
848     Size = CGF.Builder.CreateNUWAdd(
849         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
850     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
851   } else {
852     SizeInChars =
853         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
854     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
855   }
856   Sizes.emplace_back(SizeInChars, Size);
857   CodeGenFunction::OpaqueValueMapping OpaqueMap(
858       CGF,
859       cast<OpaqueValueExpr>(
860           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
861       RValue::get(Size));
862   CGF.EmitVariablyModifiedType(PrivateType);
863 }
864 
865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
866                                          llvm::Value *Size) {
867   const auto *PrivateVD =
868       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
869   QualType PrivateType = PrivateVD->getType();
870   if (!PrivateType->isVariablyModifiedType()) {
871     assert(!Size && !Sizes[N].second &&
872            "Size should be nullptr for non-variably modified reduction "
873            "items.");
874     return;
875   }
876   CodeGenFunction::OpaqueValueMapping OpaqueMap(
877       CGF,
878       cast<OpaqueValueExpr>(
879           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
880       RValue::get(Size));
881   CGF.EmitVariablyModifiedType(PrivateType);
882 }
883 
884 void ReductionCodeGen::emitInitialization(
885     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
886     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
887   assert(SharedAddresses.size() > N && "No variable was generated");
888   const auto *PrivateVD =
889       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
890   const OMPDeclareReductionDecl *DRD =
891       getReductionInit(ClausesData[N].ReductionOp);
892   QualType PrivateType = PrivateVD->getType();
893   PrivateAddr = CGF.Builder.CreateElementBitCast(
894       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
895   QualType SharedType = SharedAddresses[N].first.getType();
896   SharedLVal = CGF.MakeAddrLValue(
897       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
898                                        CGF.ConvertTypeForMem(SharedType)),
899       SharedType, SharedAddresses[N].first.getBaseInfo(),
900       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
901   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
902     if (DRD && DRD->getInitializer())
903       (void)DefaultInit(CGF);
904     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
905   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
906     (void)DefaultInit(CGF);
907     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
908                                      PrivateAddr, SharedLVal.getAddress(CGF),
909                                      SharedLVal.getType());
910   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
911              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
912     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
913                          PrivateVD->getType().getQualifiers(),
914                          /*IsInitializer=*/false);
915   }
916 }
917 
918 bool ReductionCodeGen::needCleanups(unsigned N) {
919   const auto *PrivateVD =
920       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
921   QualType PrivateType = PrivateVD->getType();
922   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
923   return DTorKind != QualType::DK_none;
924 }
925 
926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
927                                     Address PrivateAddr) {
928   const auto *PrivateVD =
929       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
930   QualType PrivateType = PrivateVD->getType();
931   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
932   if (needCleanups(N)) {
933     PrivateAddr = CGF.Builder.CreateElementBitCast(
934         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
935     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
936   }
937 }
938 
939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           LValue BaseLV) {
941   BaseTy = BaseTy.getNonReferenceType();
942   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
943          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
944     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
945       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
946     } else {
947       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
948       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
949     }
950     BaseTy = BaseTy->getPointeeType();
951   }
952   return CGF.MakeAddrLValue(
953       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
954                                        CGF.ConvertTypeForMem(ElTy)),
955       BaseLV.getType(), BaseLV.getBaseInfo(),
956       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
957 }
958 
959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
960                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
961                           llvm::Value *Addr) {
962   Address Tmp = Address::invalid();
963   Address TopTmp = Address::invalid();
964   Address MostTopTmp = Address::invalid();
965   BaseTy = BaseTy.getNonReferenceType();
966   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
967          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
968     Tmp = CGF.CreateMemTemp(BaseTy);
969     if (TopTmp.isValid())
970       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
971     else
972       MostTopTmp = Tmp;
973     TopTmp = Tmp;
974     BaseTy = BaseTy->getPointeeType();
975   }
976   llvm::Type *Ty = BaseLVType;
977   if (Tmp.isValid())
978     Ty = Tmp.getElementType();
979   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
980   if (Tmp.isValid()) {
981     CGF.Builder.CreateStore(Addr, Tmp);
982     return MostTopTmp;
983   }
984   return Address(Addr, BaseLVAlignment);
985 }
986 
987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
988   const VarDecl *OrigVD = nullptr;
989   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
990     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
992       Base = TempOASE->getBase()->IgnoreParenImpCasts();
993     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
994       Base = TempASE->getBase()->IgnoreParenImpCasts();
995     DE = cast<DeclRefExpr>(Base);
996     OrigVD = cast<VarDecl>(DE->getDecl());
997   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
998     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
999     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1000       Base = TempASE->getBase()->IgnoreParenImpCasts();
1001     DE = cast<DeclRefExpr>(Base);
1002     OrigVD = cast<VarDecl>(DE->getDecl());
1003   }
1004   return OrigVD;
1005 }
1006 
1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1008                                                Address PrivateAddr) {
1009   const DeclRefExpr *DE;
1010   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1011     BaseDecls.emplace_back(OrigVD);
1012     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1013     LValue BaseLValue =
1014         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1015                     OriginalBaseLValue);
1016     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1017     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1018         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1019     llvm::Value *PrivatePointer =
1020         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1021             PrivateAddr.getPointer(), SharedAddr.getType());
1022     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1023         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1024     return castToBase(CGF, OrigVD->getType(),
1025                       SharedAddresses[N].first.getType(),
1026                       OriginalBaseLValue.getAddress(CGF).getType(),
1027                       OriginalBaseLValue.getAlignment(), Ptr);
1028   }
1029   BaseDecls.emplace_back(
1030       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1031   return PrivateAddr;
1032 }
1033 
1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1035   const OMPDeclareReductionDecl *DRD =
1036       getReductionInit(ClausesData[N].ReductionOp);
1037   return DRD && DRD->getInitializer();
1038 }
1039 
1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1041   return CGF.EmitLoadOfPointerLValue(
1042       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1043       getThreadIDVariable()->getType()->castAs<PointerType>());
1044 }
1045 
1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1047   if (!CGF.HaveInsertPoint())
1048     return;
1049   // 1.2.2 OpenMP Language Terminology
1050   // Structured block - An executable statement with a single entry at the
1051   // top and a single exit at the bottom.
1052   // The point of exit cannot be a branch out of the structured block.
1053   // longjmp() and throw() must not violate the entry/exit criteria.
1054   CGF.EHStack.pushTerminate();
1055   if (S)
1056     CGF.incrementProfileCounter(S);
1057   CodeGen(CGF);
1058   CGF.EHStack.popTerminate();
1059 }
1060 
1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1062     CodeGenFunction &CGF) {
1063   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1064                             getThreadIDVariable()->getType(),
1065                             AlignmentSource::Decl);
1066 }
1067 
1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1069                                        QualType FieldTy) {
1070   auto *Field = FieldDecl::Create(
1071       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1072       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1073       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1074   Field->setAccess(AS_public);
1075   DC->addDecl(Field);
1076   return Field;
1077 }
1078 
1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1080                                  StringRef Separator)
1081     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1082       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1083   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1084 
1085   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1086   OMPBuilder.initialize();
1087   loadOffloadInfoMetadata();
1088 }
1089 
1090 void CGOpenMPRuntime::clear() {
1091   InternalVars.clear();
1092   // Clean non-target variable declarations possibly used only in debug info.
1093   for (const auto &Data : EmittedNonTargetVariables) {
1094     if (!Data.getValue().pointsToAliveValue())
1095       continue;
1096     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1097     if (!GV)
1098       continue;
1099     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1100       continue;
1101     GV->eraseFromParent();
1102   }
1103 }
1104 
1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1106   SmallString<128> Buffer;
1107   llvm::raw_svector_ostream OS(Buffer);
1108   StringRef Sep = FirstSeparator;
1109   for (StringRef Part : Parts) {
1110     OS << Sep << Part;
1111     Sep = Separator;
1112   }
1113   return std::string(OS.str());
1114 }
1115 
1116 static llvm::Function *
1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1118                           const Expr *CombinerInitializer, const VarDecl *In,
1119                           const VarDecl *Out, bool IsCombiner) {
1120   // void .omp_combiner.(Ty *in, Ty *out);
1121   ASTContext &C = CGM.getContext();
1122   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1123   FunctionArgList Args;
1124   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1125                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1127                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1128   Args.push_back(&OmpOutParm);
1129   Args.push_back(&OmpInParm);
1130   const CGFunctionInfo &FnInfo =
1131       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1132   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1133   std::string Name = CGM.getOpenMPRuntime().getName(
1134       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1135   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1136                                     Name, &CGM.getModule());
1137   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1138   if (CGM.getLangOpts().Optimize) {
1139     Fn->removeFnAttr(llvm::Attribute::NoInline);
1140     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1141     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1142   }
1143   CodeGenFunction CGF(CGM);
1144   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1145   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1146   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1147                     Out->getLocation());
1148   CodeGenFunction::OMPPrivateScope Scope(CGF);
1149   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1150   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1151     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1152         .getAddress(CGF);
1153   });
1154   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1155   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1156     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1157         .getAddress(CGF);
1158   });
1159   (void)Scope.Privatize();
1160   if (!IsCombiner && Out->hasInit() &&
1161       !CGF.isTrivialInitializer(Out->getInit())) {
1162     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1163                          Out->getType().getQualifiers(),
1164                          /*IsInitializer=*/true);
1165   }
1166   if (CombinerInitializer)
1167     CGF.EmitIgnoredExpr(CombinerInitializer);
1168   Scope.ForceCleanup();
1169   CGF.FinishFunction();
1170   return Fn;
1171 }
1172 
1173 void CGOpenMPRuntime::emitUserDefinedReduction(
1174     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1175   if (UDRMap.count(D) > 0)
1176     return;
1177   llvm::Function *Combiner = emitCombinerOrInitializer(
1178       CGM, D->getType(), D->getCombiner(),
1179       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1181       /*IsCombiner=*/true);
1182   llvm::Function *Initializer = nullptr;
1183   if (const Expr *Init = D->getInitializer()) {
1184     Initializer = emitCombinerOrInitializer(
1185         CGM, D->getType(),
1186         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1187                                                                      : nullptr,
1188         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1190         /*IsCombiner=*/false);
1191   }
1192   UDRMap.try_emplace(D, Combiner, Initializer);
1193   if (CGF) {
1194     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1195     Decls.second.push_back(D);
1196   }
1197 }
1198 
1199 std::pair<llvm::Function *, llvm::Function *>
1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1201   auto I = UDRMap.find(D);
1202   if (I != UDRMap.end())
1203     return I->second;
1204   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1205   return UDRMap.lookup(D);
1206 }
1207 
1208 namespace {
1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1210 // Builder if one is present.
1211 struct PushAndPopStackRAII {
1212   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1213                       bool HasCancel, llvm::omp::Directive Kind)
1214       : OMPBuilder(OMPBuilder) {
1215     if (!OMPBuilder)
1216       return;
1217 
1218     // The following callback is the crucial part of clangs cleanup process.
1219     //
1220     // NOTE:
1221     // Once the OpenMPIRBuilder is used to create parallel regions (and
1222     // similar), the cancellation destination (Dest below) is determined via
1223     // IP. That means if we have variables to finalize we split the block at IP,
1224     // use the new block (=BB) as destination to build a JumpDest (via
1225     // getJumpDestInCurrentScope(BB)) which then is fed to
1226     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1227     // to push & pop an FinalizationInfo object.
1228     // The FiniCB will still be needed but at the point where the
1229     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1230     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1231       assert(IP.getBlock()->end() == IP.getPoint() &&
1232              "Clang CG should cause non-terminated block!");
1233       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1234       CGF.Builder.restoreIP(IP);
1235       CodeGenFunction::JumpDest Dest =
1236           CGF.getOMPCancelDestination(OMPD_parallel);
1237       CGF.EmitBranchThroughCleanup(Dest);
1238     };
1239 
1240     // TODO: Remove this once we emit parallel regions through the
1241     //       OpenMPIRBuilder as it can do this setup internally.
1242     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1243     OMPBuilder->pushFinalizationCB(std::move(FI));
1244   }
1245   ~PushAndPopStackRAII() {
1246     if (OMPBuilder)
1247       OMPBuilder->popFinalizationCB();
1248   }
1249   llvm::OpenMPIRBuilder *OMPBuilder;
1250 };
1251 } // namespace
1252 
1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1254     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1255     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1256     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1257   assert(ThreadIDVar->getType()->isPointerType() &&
1258          "thread id variable must be of type kmp_int32 *");
1259   CodeGenFunction CGF(CGM, true);
1260   bool HasCancel = false;
1261   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1262     HasCancel = OPD->hasCancel();
1263   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1264     HasCancel = OPD->hasCancel();
1265   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1266     HasCancel = OPSD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273   else if (const auto *OPFD =
1274                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1275     HasCancel = OPFD->hasCancel();
1276   else if (const auto *OPFD =
1277                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1278     HasCancel = OPFD->hasCancel();
1279 
1280   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1281   //       parallel region to make cancellation barriers work properly.
1282   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1283   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1285                                     HasCancel, OutlinedHelperName);
1286   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1287   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1293   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1294   return emitParallelOrTeamsOutlinedFunction(
1295       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1296 }
1297 
1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1299     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1300     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1301   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1302   return emitParallelOrTeamsOutlinedFunction(
1303       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1304 }
1305 
1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1307     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1308     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1309     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1310     bool Tied, unsigned &NumberOfParts) {
1311   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1312                                               PrePostActionTy &) {
1313     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1314     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1315     llvm::Value *TaskArgs[] = {
1316         UpLoc, ThreadID,
1317         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1318                                     TaskTVar->getType()->castAs<PointerType>())
1319             .getPointer(CGF)};
1320     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1321                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1322                         TaskArgs);
1323   };
1324   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1325                                                             UntiedCodeGen);
1326   CodeGen.setAction(Action);
1327   assert(!ThreadIDVar->getType()->isPointerType() &&
1328          "thread id variable must be of type kmp_int32 for tasks");
1329   const OpenMPDirectiveKind Region =
1330       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1331                                                       : OMPD_task;
1332   const CapturedStmt *CS = D.getCapturedStmt(Region);
1333   bool HasCancel = false;
1334   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1339     HasCancel = TD->hasCancel();
1340   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1341     HasCancel = TD->hasCancel();
1342 
1343   CodeGenFunction CGF(CGM, true);
1344   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1345                                         InnermostKind, HasCancel, Action);
1346   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1347   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1348   if (!Tied)
1349     NumberOfParts = Action.getNumberOfParts();
1350   return Res;
1351 }
1352 
1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1354                              const RecordDecl *RD, const CGRecordLayout &RL,
1355                              ArrayRef<llvm::Constant *> Data) {
1356   llvm::StructType *StructTy = RL.getLLVMType();
1357   unsigned PrevIdx = 0;
1358   ConstantInitBuilder CIBuilder(CGM);
1359   auto DI = Data.begin();
1360   for (const FieldDecl *FD : RD->fields()) {
1361     unsigned Idx = RL.getLLVMFieldNo(FD);
1362     // Fill the alignment.
1363     for (unsigned I = PrevIdx; I < Idx; ++I)
1364       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1365     PrevIdx = Idx + 1;
1366     Fields.add(*DI);
1367     ++DI;
1368   }
1369 }
1370 
1371 template <class... As>
1372 static llvm::GlobalVariable *
1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1374                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1375                    As &&... Args) {
1376   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1377   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1378   ConstantInitBuilder CIBuilder(CGM);
1379   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1380   buildStructValue(Fields, CGM, RD, RL, Data);
1381   return Fields.finishAndCreateGlobal(
1382       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1383       std::forward<As>(Args)...);
1384 }
1385 
1386 template <typename T>
1387 static void
1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1389                                          ArrayRef<llvm::Constant *> Data,
1390                                          T &Parent) {
1391   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1392   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1393   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1394   buildStructValue(Fields, CGM, RD, RL, Data);
1395   Fields.finishAndAddTo(Parent);
1396 }
1397 
1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1399                                              bool AtCurrentPoint) {
1400   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1401   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1402 
1403   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1404   if (AtCurrentPoint) {
1405     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1406         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1407   } else {
1408     Elem.second.ServiceInsertPt =
1409         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1410     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1411   }
1412 }
1413 
1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1415   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1416   if (Elem.second.ServiceInsertPt) {
1417     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1418     Elem.second.ServiceInsertPt = nullptr;
1419     Ptr->eraseFromParent();
1420   }
1421 }
1422 
1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1424                                                   SourceLocation Loc,
1425                                                   SmallString<128> &Buffer) {
1426   llvm::raw_svector_ostream OS(Buffer);
1427   // Build debug location
1428   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1429   OS << ";" << PLoc.getFilename() << ";";
1430   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1431     OS << FD->getQualifiedNameAsString();
1432   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1433   return OS.str();
1434 }
1435 
1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1437                                                  SourceLocation Loc,
1438                                                  unsigned Flags) {
1439   llvm::Constant *SrcLocStr;
1440   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1441       Loc.isInvalid()) {
1442     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1443   } else {
1444     std::string FunctionName = "";
1445     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1446       FunctionName = FD->getQualifiedNameAsString();
1447     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1448     const char *FileName = PLoc.getFilename();
1449     unsigned Line = PLoc.getLine();
1450     unsigned Column = PLoc.getColumn();
1451     SrcLocStr =
1452         OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column);
1453   }
1454   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1455   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1456                                      Reserved2Flags);
1457 }
1458 
1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1460                                           SourceLocation Loc) {
1461   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1462   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1463   // the clang invariants used below might be broken.
1464   if (CGM.getLangOpts().OpenMPIRBuilder) {
1465     SmallString<128> Buffer;
1466     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1467     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1468         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1469     return OMPBuilder.getOrCreateThreadID(
1470         OMPBuilder.getOrCreateIdent(SrcLocStr));
1471   }
1472 
1473   llvm::Value *ThreadID = nullptr;
1474   // Check whether we've already cached a load of the thread id in this
1475   // function.
1476   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1477   if (I != OpenMPLocThreadIDMap.end()) {
1478     ThreadID = I->second.ThreadID;
1479     if (ThreadID != nullptr)
1480       return ThreadID;
1481   }
1482   // If exceptions are enabled, do not use parameter to avoid possible crash.
1483   if (auto *OMPRegionInfo =
1484           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1485     if (OMPRegionInfo->getThreadIDVariable()) {
1486       // Check if this an outlined function with thread id passed as argument.
1487       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1488       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1489       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1490           !CGF.getLangOpts().CXXExceptions ||
1491           CGF.Builder.GetInsertBlock() == TopBlock ||
1492           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1493           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1494               TopBlock ||
1495           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1496               CGF.Builder.GetInsertBlock()) {
1497         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1498         // If value loaded in entry block, cache it and use it everywhere in
1499         // function.
1500         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1501           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1502           Elem.second.ThreadID = ThreadID;
1503         }
1504         return ThreadID;
1505       }
1506     }
1507   }
1508 
1509   // This is not an outlined function region - need to call __kmpc_int32
1510   // kmpc_global_thread_num(ident_t *loc).
1511   // Generate thread id value and cache this value for use across the
1512   // function.
1513   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1514   if (!Elem.second.ServiceInsertPt)
1515     setLocThreadIdInsertPt(CGF);
1516   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1517   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1518   llvm::CallInst *Call = CGF.Builder.CreateCall(
1519       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1520                                             OMPRTL___kmpc_global_thread_num),
1521       emitUpdateLocation(CGF, Loc));
1522   Call->setCallingConv(CGF.getRuntimeCC());
1523   Elem.second.ThreadID = Call;
1524   return Call;
1525 }
1526 
1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1528   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1529   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1530     clearLocThreadIdInsertPt(CGF);
1531     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1532   }
1533   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1534     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1535       UDRMap.erase(D);
1536     FunctionUDRMap.erase(CGF.CurFn);
1537   }
1538   auto I = FunctionUDMMap.find(CGF.CurFn);
1539   if (I != FunctionUDMMap.end()) {
1540     for(const auto *D : I->second)
1541       UDMMap.erase(D);
1542     FunctionUDMMap.erase(I);
1543   }
1544   LastprivateConditionalToTypes.erase(CGF.CurFn);
1545   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1549   return OMPBuilder.IdentPtr;
1550 }
1551 
1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1553   if (!Kmpc_MicroTy) {
1554     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1555     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1556                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1557     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1558   }
1559   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1560 }
1561 
1562 llvm::FunctionCallee
1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1564                                              bool IsGPUDistribute) {
1565   assert((IVSize == 32 || IVSize == 64) &&
1566          "IV size is not compatible with the omp runtime");
1567   StringRef Name;
1568   if (IsGPUDistribute)
1569     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1570                                     : "__kmpc_distribute_static_init_4u")
1571                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1572                                     : "__kmpc_distribute_static_init_8u");
1573   else
1574     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1575                                     : "__kmpc_for_static_init_4u")
1576                         : (IVSigned ? "__kmpc_for_static_init_8"
1577                                     : "__kmpc_for_static_init_8u");
1578 
1579   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1580   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1581   llvm::Type *TypeParams[] = {
1582     getIdentTyPointerTy(),                     // loc
1583     CGM.Int32Ty,                               // tid
1584     CGM.Int32Ty,                               // schedtype
1585     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1586     PtrTy,                                     // p_lower
1587     PtrTy,                                     // p_upper
1588     PtrTy,                                     // p_stride
1589     ITy,                                       // incr
1590     ITy                                        // chunk
1591   };
1592   auto *FnTy =
1593       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1594   return CGM.CreateRuntimeFunction(FnTy, Name);
1595 }
1596 
1597 llvm::FunctionCallee
1598 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1599   assert((IVSize == 32 || IVSize == 64) &&
1600          "IV size is not compatible with the omp runtime");
1601   StringRef Name =
1602       IVSize == 32
1603           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1604           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1605   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1606   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1607                                CGM.Int32Ty,           // tid
1608                                CGM.Int32Ty,           // schedtype
1609                                ITy,                   // lower
1610                                ITy,                   // upper
1611                                ITy,                   // stride
1612                                ITy                    // chunk
1613   };
1614   auto *FnTy =
1615       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1616   return CGM.CreateRuntimeFunction(FnTy, Name);
1617 }
1618 
1619 llvm::FunctionCallee
1620 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1621   assert((IVSize == 32 || IVSize == 64) &&
1622          "IV size is not compatible with the omp runtime");
1623   StringRef Name =
1624       IVSize == 32
1625           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1626           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1627   llvm::Type *TypeParams[] = {
1628       getIdentTyPointerTy(), // loc
1629       CGM.Int32Ty,           // tid
1630   };
1631   auto *FnTy =
1632       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1633   return CGM.CreateRuntimeFunction(FnTy, Name);
1634 }
1635 
1636 llvm::FunctionCallee
1637 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1638   assert((IVSize == 32 || IVSize == 64) &&
1639          "IV size is not compatible with the omp runtime");
1640   StringRef Name =
1641       IVSize == 32
1642           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1643           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1644   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1645   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1646   llvm::Type *TypeParams[] = {
1647     getIdentTyPointerTy(),                     // loc
1648     CGM.Int32Ty,                               // tid
1649     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1650     PtrTy,                                     // p_lower
1651     PtrTy,                                     // p_upper
1652     PtrTy                                      // p_stride
1653   };
1654   auto *FnTy =
1655       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1656   return CGM.CreateRuntimeFunction(FnTy, Name);
1657 }
1658 
1659 /// Obtain information that uniquely identifies a target entry. This
1660 /// consists of the file and device IDs as well as line number associated with
1661 /// the relevant entry source location.
1662 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1663                                      unsigned &DeviceID, unsigned &FileID,
1664                                      unsigned &LineNum) {
1665   SourceManager &SM = C.getSourceManager();
1666 
1667   // The loc should be always valid and have a file ID (the user cannot use
1668   // #pragma directives in macros)
1669 
1670   assert(Loc.isValid() && "Source location is expected to be always valid.");
1671 
1672   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1673   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1674 
1675   llvm::sys::fs::UniqueID ID;
1676   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1677     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1678     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1679     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1680       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1681           << PLoc.getFilename() << EC.message();
1682   }
1683 
1684   DeviceID = ID.getDevice();
1685   FileID = ID.getFile();
1686   LineNum = PLoc.getLine();
1687 }
1688 
1689 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1690   if (CGM.getLangOpts().OpenMPSimd)
1691     return Address::invalid();
1692   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1693       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1694   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1695               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1696                HasRequiresUnifiedSharedMemory))) {
1697     SmallString<64> PtrName;
1698     {
1699       llvm::raw_svector_ostream OS(PtrName);
1700       OS << CGM.getMangledName(GlobalDecl(VD));
1701       if (!VD->isExternallyVisible()) {
1702         unsigned DeviceID, FileID, Line;
1703         getTargetEntryUniqueInfo(CGM.getContext(),
1704                                  VD->getCanonicalDecl()->getBeginLoc(),
1705                                  DeviceID, FileID, Line);
1706         OS << llvm::format("_%x", FileID);
1707       }
1708       OS << "_decl_tgt_ref_ptr";
1709     }
1710     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1711     if (!Ptr) {
1712       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1713       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1714                                         PtrName);
1715 
1716       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1717       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1718 
1719       if (!CGM.getLangOpts().OpenMPIsDevice)
1720         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1721       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1722     }
1723     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1724   }
1725   return Address::invalid();
1726 }
1727 
1728 llvm::Constant *
1729 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1730   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1731          !CGM.getContext().getTargetInfo().isTLSSupported());
1732   // Lookup the entry, lazily creating it if necessary.
1733   std::string Suffix = getName({"cache", ""});
1734   return getOrCreateInternalVariable(
1735       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1736 }
1737 
1738 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1739                                                 const VarDecl *VD,
1740                                                 Address VDAddr,
1741                                                 SourceLocation Loc) {
1742   if (CGM.getLangOpts().OpenMPUseTLS &&
1743       CGM.getContext().getTargetInfo().isTLSSupported())
1744     return VDAddr;
1745 
1746   llvm::Type *VarTy = VDAddr.getElementType();
1747   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1748                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1749                                                        CGM.Int8PtrTy),
1750                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1751                          getOrCreateThreadPrivateCache(VD)};
1752   return Address(CGF.EmitRuntimeCall(
1753                      OMPBuilder.getOrCreateRuntimeFunction(
1754                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1755                      Args),
1756                  VDAddr.getAlignment());
1757 }
1758 
1759 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1760     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1761     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1762   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1763   // library.
1764   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1765   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1766                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1767                       OMPLoc);
1768   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1769   // to register constructor/destructor for variable.
1770   llvm::Value *Args[] = {
1771       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1772       Ctor, CopyCtor, Dtor};
1773   CGF.EmitRuntimeCall(
1774       OMPBuilder.getOrCreateRuntimeFunction(
1775           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1776       Args);
1777 }
1778 
1779 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1780     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1781     bool PerformInit, CodeGenFunction *CGF) {
1782   if (CGM.getLangOpts().OpenMPUseTLS &&
1783       CGM.getContext().getTargetInfo().isTLSSupported())
1784     return nullptr;
1785 
1786   VD = VD->getDefinition(CGM.getContext());
1787   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1788     QualType ASTTy = VD->getType();
1789 
1790     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1791     const Expr *Init = VD->getAnyInitializer();
1792     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1793       // Generate function that re-emits the declaration's initializer into the
1794       // threadprivate copy of the variable VD
1795       CodeGenFunction CtorCGF(CGM);
1796       FunctionArgList Args;
1797       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1798                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1799                             ImplicitParamDecl::Other);
1800       Args.push_back(&Dst);
1801 
1802       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1803           CGM.getContext().VoidPtrTy, Args);
1804       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1805       std::string Name = getName({"__kmpc_global_ctor_", ""});
1806       llvm::Function *Fn =
1807           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1808       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1809                             Args, Loc, Loc);
1810       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1811           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1812           CGM.getContext().VoidPtrTy, Dst.getLocation());
1813       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1814       Arg = CtorCGF.Builder.CreateElementBitCast(
1815           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1816       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1817                                /*IsInitializer=*/true);
1818       ArgVal = CtorCGF.EmitLoadOfScalar(
1819           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1820           CGM.getContext().VoidPtrTy, Dst.getLocation());
1821       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1822       CtorCGF.FinishFunction();
1823       Ctor = Fn;
1824     }
1825     if (VD->getType().isDestructedType() != QualType::DK_none) {
1826       // Generate function that emits destructor call for the threadprivate copy
1827       // of the variable VD
1828       CodeGenFunction DtorCGF(CGM);
1829       FunctionArgList Args;
1830       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1831                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1832                             ImplicitParamDecl::Other);
1833       Args.push_back(&Dst);
1834 
1835       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1836           CGM.getContext().VoidTy, Args);
1837       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1838       std::string Name = getName({"__kmpc_global_dtor_", ""});
1839       llvm::Function *Fn =
1840           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1841       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1842       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1843                             Loc, Loc);
1844       // Create a scope with an artificial location for the body of this function.
1845       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1846       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1847           DtorCGF.GetAddrOfLocalVar(&Dst),
1848           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1849       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1850                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1851                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1852       DtorCGF.FinishFunction();
1853       Dtor = Fn;
1854     }
1855     // Do not emit init function if it is not required.
1856     if (!Ctor && !Dtor)
1857       return nullptr;
1858 
1859     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1860     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1861                                                /*isVarArg=*/false)
1862                            ->getPointerTo();
1863     // Copying constructor for the threadprivate variable.
1864     // Must be NULL - reserved by runtime, but currently it requires that this
1865     // parameter is always NULL. Otherwise it fires assertion.
1866     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1867     if (Ctor == nullptr) {
1868       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1869                                              /*isVarArg=*/false)
1870                          ->getPointerTo();
1871       Ctor = llvm::Constant::getNullValue(CtorTy);
1872     }
1873     if (Dtor == nullptr) {
1874       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1875                                              /*isVarArg=*/false)
1876                          ->getPointerTo();
1877       Dtor = llvm::Constant::getNullValue(DtorTy);
1878     }
1879     if (!CGF) {
1880       auto *InitFunctionTy =
1881           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1882       std::string Name = getName({"__omp_threadprivate_init_", ""});
1883       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1884           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1885       CodeGenFunction InitCGF(CGM);
1886       FunctionArgList ArgList;
1887       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1888                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1889                             Loc, Loc);
1890       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1891       InitCGF.FinishFunction();
1892       return InitFunction;
1893     }
1894     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1895   }
1896   return nullptr;
1897 }
1898 
1899 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1900                                                      llvm::GlobalVariable *Addr,
1901                                                      bool PerformInit) {
1902   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1903       !CGM.getLangOpts().OpenMPIsDevice)
1904     return false;
1905   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1906       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1907   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1908       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1909        HasRequiresUnifiedSharedMemory))
1910     return CGM.getLangOpts().OpenMPIsDevice;
1911   VD = VD->getDefinition(CGM.getContext());
1912   assert(VD && "Unknown VarDecl");
1913 
1914   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1915     return CGM.getLangOpts().OpenMPIsDevice;
1916 
1917   QualType ASTTy = VD->getType();
1918   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1919 
1920   // Produce the unique prefix to identify the new target regions. We use
1921   // the source location of the variable declaration which we know to not
1922   // conflict with any target region.
1923   unsigned DeviceID;
1924   unsigned FileID;
1925   unsigned Line;
1926   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1927   SmallString<128> Buffer, Out;
1928   {
1929     llvm::raw_svector_ostream OS(Buffer);
1930     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1931        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1932   }
1933 
1934   const Expr *Init = VD->getAnyInitializer();
1935   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1936     llvm::Constant *Ctor;
1937     llvm::Constant *ID;
1938     if (CGM.getLangOpts().OpenMPIsDevice) {
1939       // Generate function that re-emits the declaration's initializer into
1940       // the threadprivate copy of the variable VD
1941       CodeGenFunction CtorCGF(CGM);
1942 
1943       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1944       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1945       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1946           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1947       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1948       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1949                             FunctionArgList(), Loc, Loc);
1950       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1951       CtorCGF.EmitAnyExprToMem(Init,
1952                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1953                                Init->getType().getQualifiers(),
1954                                /*IsInitializer=*/true);
1955       CtorCGF.FinishFunction();
1956       Ctor = Fn;
1957       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1958       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1959     } else {
1960       Ctor = new llvm::GlobalVariable(
1961           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1962           llvm::GlobalValue::PrivateLinkage,
1963           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1964       ID = Ctor;
1965     }
1966 
1967     // Register the information for the entry associated with the constructor.
1968     Out.clear();
1969     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1970         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1971         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1972   }
1973   if (VD->getType().isDestructedType() != QualType::DK_none) {
1974     llvm::Constant *Dtor;
1975     llvm::Constant *ID;
1976     if (CGM.getLangOpts().OpenMPIsDevice) {
1977       // Generate function that emits destructor call for the threadprivate
1978       // copy of the variable VD
1979       CodeGenFunction DtorCGF(CGM);
1980 
1981       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1982       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1983       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1984           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1985       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1986       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1987                             FunctionArgList(), Loc, Loc);
1988       // Create a scope with an artificial location for the body of this
1989       // function.
1990       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1991       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1992                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1993                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1994       DtorCGF.FinishFunction();
1995       Dtor = Fn;
1996       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1997       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1998     } else {
1999       Dtor = new llvm::GlobalVariable(
2000           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2001           llvm::GlobalValue::PrivateLinkage,
2002           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2003       ID = Dtor;
2004     }
2005     // Register the information for the entry associated with the destructor.
2006     Out.clear();
2007     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2008         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2009         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2010   }
2011   return CGM.getLangOpts().OpenMPIsDevice;
2012 }
2013 
2014 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2015                                                           QualType VarType,
2016                                                           StringRef Name) {
2017   std::string Suffix = getName({"artificial", ""});
2018   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2019   llvm::Value *GAddr =
2020       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2021   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2022       CGM.getTarget().isTLSSupported()) {
2023     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2024     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2025   }
2026   std::string CacheSuffix = getName({"cache", ""});
2027   llvm::Value *Args[] = {
2028       emitUpdateLocation(CGF, SourceLocation()),
2029       getThreadID(CGF, SourceLocation()),
2030       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2031       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2032                                 /*isSigned=*/false),
2033       getOrCreateInternalVariable(
2034           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2035   return Address(
2036       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2037           CGF.EmitRuntimeCall(
2038               OMPBuilder.getOrCreateRuntimeFunction(
2039                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2040               Args),
2041           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2042       CGM.getContext().getTypeAlignInChars(VarType));
2043 }
2044 
2045 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2046                                    const RegionCodeGenTy &ThenGen,
2047                                    const RegionCodeGenTy &ElseGen) {
2048   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2049 
2050   // If the condition constant folds and can be elided, try to avoid emitting
2051   // the condition and the dead arm of the if/else.
2052   bool CondConstant;
2053   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2054     if (CondConstant)
2055       ThenGen(CGF);
2056     else
2057       ElseGen(CGF);
2058     return;
2059   }
2060 
2061   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2062   // emit the conditional branch.
2063   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2064   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2065   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2066   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2067 
2068   // Emit the 'then' code.
2069   CGF.EmitBlock(ThenBlock);
2070   ThenGen(CGF);
2071   CGF.EmitBranch(ContBlock);
2072   // Emit the 'else' code if present.
2073   // There is no need to emit line number for unconditional branch.
2074   (void)ApplyDebugLocation::CreateEmpty(CGF);
2075   CGF.EmitBlock(ElseBlock);
2076   ElseGen(CGF);
2077   // There is no need to emit line number for unconditional branch.
2078   (void)ApplyDebugLocation::CreateEmpty(CGF);
2079   CGF.EmitBranch(ContBlock);
2080   // Emit the continuation block for code after the if.
2081   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2082 }
2083 
2084 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2085                                        llvm::Function *OutlinedFn,
2086                                        ArrayRef<llvm::Value *> CapturedVars,
2087                                        const Expr *IfCond) {
2088   if (!CGF.HaveInsertPoint())
2089     return;
2090   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2091   auto &M = CGM.getModule();
2092   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2093                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2094     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2095     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2096     llvm::Value *Args[] = {
2097         RTLoc,
2098         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2099         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2100     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2101     RealArgs.append(std::begin(Args), std::end(Args));
2102     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2103 
2104     llvm::FunctionCallee RTLFn =
2105         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2106     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2107   };
2108   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2109                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2110     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2111     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2112     // Build calls:
2113     // __kmpc_serialized_parallel(&Loc, GTid);
2114     llvm::Value *Args[] = {RTLoc, ThreadID};
2115     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2116                             M, OMPRTL___kmpc_serialized_parallel),
2117                         Args);
2118 
2119     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2120     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2121     Address ZeroAddrBound =
2122         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2123                                          /*Name=*/".bound.zero.addr");
2124     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2125     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2126     // ThreadId for serialized parallels is 0.
2127     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2128     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2129     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2130 
2131     // Ensure we do not inline the function. This is trivially true for the ones
2132     // passed to __kmpc_fork_call but the ones called in serialized regions
2133     // could be inlined. This is not a perfect but it is closer to the invariant
2134     // we want, namely, every data environment starts with a new function.
2135     // TODO: We should pass the if condition to the runtime function and do the
2136     //       handling there. Much cleaner code.
2137     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2138     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2139     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2140 
2141     // __kmpc_end_serialized_parallel(&Loc, GTid);
2142     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2143     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2144                             M, OMPRTL___kmpc_end_serialized_parallel),
2145                         EndArgs);
2146   };
2147   if (IfCond) {
2148     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2149   } else {
2150     RegionCodeGenTy ThenRCG(ThenGen);
2151     ThenRCG(CGF);
2152   }
2153 }
2154 
2155 // If we're inside an (outlined) parallel region, use the region info's
2156 // thread-ID variable (it is passed in a first argument of the outlined function
2157 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2158 // regular serial code region, get thread ID by calling kmp_int32
2159 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2160 // return the address of that temp.
2161 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2162                                              SourceLocation Loc) {
2163   if (auto *OMPRegionInfo =
2164           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2165     if (OMPRegionInfo->getThreadIDVariable())
2166       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2167 
2168   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2169   QualType Int32Ty =
2170       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2171   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2172   CGF.EmitStoreOfScalar(ThreadID,
2173                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2174 
2175   return ThreadIDTemp;
2176 }
2177 
2178 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2179     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2180   SmallString<256> Buffer;
2181   llvm::raw_svector_ostream Out(Buffer);
2182   Out << Name;
2183   StringRef RuntimeName = Out.str();
2184   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2185   if (Elem.second) {
2186     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2187            "OMP internal variable has different type than requested");
2188     return &*Elem.second;
2189   }
2190 
2191   return Elem.second = new llvm::GlobalVariable(
2192              CGM.getModule(), Ty, /*IsConstant*/ false,
2193              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2194              Elem.first(), /*InsertBefore=*/nullptr,
2195              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2196 }
2197 
2198 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2199   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2200   std::string Name = getName({Prefix, "var"});
2201   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2202 }
2203 
2204 namespace {
2205 /// Common pre(post)-action for different OpenMP constructs.
2206 class CommonActionTy final : public PrePostActionTy {
2207   llvm::FunctionCallee EnterCallee;
2208   ArrayRef<llvm::Value *> EnterArgs;
2209   llvm::FunctionCallee ExitCallee;
2210   ArrayRef<llvm::Value *> ExitArgs;
2211   bool Conditional;
2212   llvm::BasicBlock *ContBlock = nullptr;
2213 
2214 public:
2215   CommonActionTy(llvm::FunctionCallee EnterCallee,
2216                  ArrayRef<llvm::Value *> EnterArgs,
2217                  llvm::FunctionCallee ExitCallee,
2218                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2219       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2220         ExitArgs(ExitArgs), Conditional(Conditional) {}
2221   void Enter(CodeGenFunction &CGF) override {
2222     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2223     if (Conditional) {
2224       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2225       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2226       ContBlock = CGF.createBasicBlock("omp_if.end");
2227       // Generate the branch (If-stmt)
2228       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2229       CGF.EmitBlock(ThenBlock);
2230     }
2231   }
2232   void Done(CodeGenFunction &CGF) {
2233     // Emit the rest of blocks/branches
2234     CGF.EmitBranch(ContBlock);
2235     CGF.EmitBlock(ContBlock, true);
2236   }
2237   void Exit(CodeGenFunction &CGF) override {
2238     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2239   }
2240 };
2241 } // anonymous namespace
2242 
2243 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2244                                          StringRef CriticalName,
2245                                          const RegionCodeGenTy &CriticalOpGen,
2246                                          SourceLocation Loc, const Expr *Hint) {
2247   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2248   // CriticalOpGen();
2249   // __kmpc_end_critical(ident_t *, gtid, Lock);
2250   // Prepare arguments and build a call to __kmpc_critical
2251   if (!CGF.HaveInsertPoint())
2252     return;
2253   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2254                          getCriticalRegionLock(CriticalName)};
2255   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2256                                                 std::end(Args));
2257   if (Hint) {
2258     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2259         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2260   }
2261   CommonActionTy Action(
2262       OMPBuilder.getOrCreateRuntimeFunction(
2263           CGM.getModule(),
2264           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2265       EnterArgs,
2266       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2267                                             OMPRTL___kmpc_end_critical),
2268       Args);
2269   CriticalOpGen.setAction(Action);
2270   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2271 }
2272 
2273 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2274                                        const RegionCodeGenTy &MasterOpGen,
2275                                        SourceLocation Loc) {
2276   if (!CGF.HaveInsertPoint())
2277     return;
2278   // if(__kmpc_master(ident_t *, gtid)) {
2279   //   MasterOpGen();
2280   //   __kmpc_end_master(ident_t *, gtid);
2281   // }
2282   // Prepare arguments and build a call to __kmpc_master
2283   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2284   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2285                             CGM.getModule(), OMPRTL___kmpc_master),
2286                         Args,
2287                         OMPBuilder.getOrCreateRuntimeFunction(
2288                             CGM.getModule(), OMPRTL___kmpc_end_master),
2289                         Args,
2290                         /*Conditional=*/true);
2291   MasterOpGen.setAction(Action);
2292   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2293   Action.Done(CGF);
2294 }
2295 
2296 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2297                                        const RegionCodeGenTy &MaskedOpGen,
2298                                        SourceLocation Loc, const Expr *Filter) {
2299   if (!CGF.HaveInsertPoint())
2300     return;
2301   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2302   //   MaskedOpGen();
2303   //   __kmpc_end_masked(iden_t *, gtid);
2304   // }
2305   // Prepare arguments and build a call to __kmpc_masked
2306   llvm::Value *FilterVal = Filter
2307                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2308                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2309   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2310                          FilterVal};
2311   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2312                             getThreadID(CGF, Loc)};
2313   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2314                             CGM.getModule(), OMPRTL___kmpc_masked),
2315                         Args,
2316                         OMPBuilder.getOrCreateRuntimeFunction(
2317                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2318                         ArgsEnd,
2319                         /*Conditional=*/true);
2320   MaskedOpGen.setAction(Action);
2321   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2322   Action.Done(CGF);
2323 }
2324 
2325 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2326                                         SourceLocation Loc) {
2327   if (!CGF.HaveInsertPoint())
2328     return;
2329   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2330     OMPBuilder.createTaskyield(CGF.Builder);
2331   } else {
2332     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2333     llvm::Value *Args[] = {
2334         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2335         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2336     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2337                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2338                         Args);
2339   }
2340 
2341   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2342     Region->emitUntiedSwitch(CGF);
2343 }
2344 
2345 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2346                                           const RegionCodeGenTy &TaskgroupOpGen,
2347                                           SourceLocation Loc) {
2348   if (!CGF.HaveInsertPoint())
2349     return;
2350   // __kmpc_taskgroup(ident_t *, gtid);
2351   // TaskgroupOpGen();
2352   // __kmpc_end_taskgroup(ident_t *, gtid);
2353   // Prepare arguments and build a call to __kmpc_taskgroup
2354   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2355   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2357                         Args,
2358                         OMPBuilder.getOrCreateRuntimeFunction(
2359                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2360                         Args);
2361   TaskgroupOpGen.setAction(Action);
2362   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2363 }
2364 
2365 /// Given an array of pointers to variables, project the address of a
2366 /// given variable.
2367 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2368                                       unsigned Index, const VarDecl *Var) {
2369   // Pull out the pointer to the variable.
2370   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2371   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2372 
2373   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2374   Addr = CGF.Builder.CreateElementBitCast(
2375       Addr, CGF.ConvertTypeForMem(Var->getType()));
2376   return Addr;
2377 }
2378 
2379 static llvm::Value *emitCopyprivateCopyFunction(
2380     CodeGenModule &CGM, llvm::Type *ArgsType,
2381     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2382     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2383     SourceLocation Loc) {
2384   ASTContext &C = CGM.getContext();
2385   // void copy_func(void *LHSArg, void *RHSArg);
2386   FunctionArgList Args;
2387   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2388                            ImplicitParamDecl::Other);
2389   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2390                            ImplicitParamDecl::Other);
2391   Args.push_back(&LHSArg);
2392   Args.push_back(&RHSArg);
2393   const auto &CGFI =
2394       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2395   std::string Name =
2396       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2397   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2398                                     llvm::GlobalValue::InternalLinkage, Name,
2399                                     &CGM.getModule());
2400   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2401   Fn->setDoesNotRecurse();
2402   CodeGenFunction CGF(CGM);
2403   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2404   // Dest = (void*[n])(LHSArg);
2405   // Src = (void*[n])(RHSArg);
2406   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2408       ArgsType), CGF.getPointerAlign());
2409   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2410       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2411       ArgsType), CGF.getPointerAlign());
2412   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2413   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2414   // ...
2415   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2416   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2417     const auto *DestVar =
2418         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2419     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2420 
2421     const auto *SrcVar =
2422         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2423     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2424 
2425     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2426     QualType Type = VD->getType();
2427     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2428   }
2429   CGF.FinishFunction();
2430   return Fn;
2431 }
2432 
2433 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2434                                        const RegionCodeGenTy &SingleOpGen,
2435                                        SourceLocation Loc,
2436                                        ArrayRef<const Expr *> CopyprivateVars,
2437                                        ArrayRef<const Expr *> SrcExprs,
2438                                        ArrayRef<const Expr *> DstExprs,
2439                                        ArrayRef<const Expr *> AssignmentOps) {
2440   if (!CGF.HaveInsertPoint())
2441     return;
2442   assert(CopyprivateVars.size() == SrcExprs.size() &&
2443          CopyprivateVars.size() == DstExprs.size() &&
2444          CopyprivateVars.size() == AssignmentOps.size());
2445   ASTContext &C = CGM.getContext();
2446   // int32 did_it = 0;
2447   // if(__kmpc_single(ident_t *, gtid)) {
2448   //   SingleOpGen();
2449   //   __kmpc_end_single(ident_t *, gtid);
2450   //   did_it = 1;
2451   // }
2452   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2453   // <copy_func>, did_it);
2454 
2455   Address DidIt = Address::invalid();
2456   if (!CopyprivateVars.empty()) {
2457     // int32 did_it = 0;
2458     QualType KmpInt32Ty =
2459         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2460     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2461     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2462   }
2463   // Prepare arguments and build a call to __kmpc_single
2464   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2465   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2466                             CGM.getModule(), OMPRTL___kmpc_single),
2467                         Args,
2468                         OMPBuilder.getOrCreateRuntimeFunction(
2469                             CGM.getModule(), OMPRTL___kmpc_end_single),
2470                         Args,
2471                         /*Conditional=*/true);
2472   SingleOpGen.setAction(Action);
2473   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2474   if (DidIt.isValid()) {
2475     // did_it = 1;
2476     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2477   }
2478   Action.Done(CGF);
2479   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2480   // <copy_func>, did_it);
2481   if (DidIt.isValid()) {
2482     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2483     QualType CopyprivateArrayTy = C.getConstantArrayType(
2484         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2485         /*IndexTypeQuals=*/0);
2486     // Create a list of all private variables for copyprivate.
2487     Address CopyprivateList =
2488         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2489     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2490       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2491       CGF.Builder.CreateStore(
2492           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2493               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2494               CGF.VoidPtrTy),
2495           Elem);
2496     }
2497     // Build function that copies private values from single region to all other
2498     // threads in the corresponding parallel region.
2499     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2500         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2501         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2502     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2503     Address CL =
2504       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2505                                                       CGF.VoidPtrTy);
2506     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2507     llvm::Value *Args[] = {
2508         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2509         getThreadID(CGF, Loc),        // i32 <gtid>
2510         BufSize,                      // size_t <buf_size>
2511         CL.getPointer(),              // void *<copyprivate list>
2512         CpyFn,                        // void (*) (void *, void *) <copy_func>
2513         DidItVal                      // i32 did_it
2514     };
2515     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2516                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2517                         Args);
2518   }
2519 }
2520 
2521 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2522                                         const RegionCodeGenTy &OrderedOpGen,
2523                                         SourceLocation Loc, bool IsThreads) {
2524   if (!CGF.HaveInsertPoint())
2525     return;
2526   // __kmpc_ordered(ident_t *, gtid);
2527   // OrderedOpGen();
2528   // __kmpc_end_ordered(ident_t *, gtid);
2529   // Prepare arguments and build a call to __kmpc_ordered
2530   if (IsThreads) {
2531     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2532     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2533                               CGM.getModule(), OMPRTL___kmpc_ordered),
2534                           Args,
2535                           OMPBuilder.getOrCreateRuntimeFunction(
2536                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2537                           Args);
2538     OrderedOpGen.setAction(Action);
2539     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540     return;
2541   }
2542   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2543 }
2544 
2545 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2546   unsigned Flags;
2547   if (Kind == OMPD_for)
2548     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2549   else if (Kind == OMPD_sections)
2550     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2551   else if (Kind == OMPD_single)
2552     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2553   else if (Kind == OMPD_barrier)
2554     Flags = OMP_IDENT_BARRIER_EXPL;
2555   else
2556     Flags = OMP_IDENT_BARRIER_IMPL;
2557   return Flags;
2558 }
2559 
2560 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2561     CodeGenFunction &CGF, const OMPLoopDirective &S,
2562     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2563   // Check if the loop directive is actually a doacross loop directive. In this
2564   // case choose static, 1 schedule.
2565   if (llvm::any_of(
2566           S.getClausesOfKind<OMPOrderedClause>(),
2567           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2568     ScheduleKind = OMPC_SCHEDULE_static;
2569     // Chunk size is 1 in this case.
2570     llvm::APInt ChunkSize(32, 1);
2571     ChunkExpr = IntegerLiteral::Create(
2572         CGF.getContext(), ChunkSize,
2573         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2574         SourceLocation());
2575   }
2576 }
2577 
2578 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2579                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2580                                       bool ForceSimpleCall) {
2581   // Check if we should use the OMPBuilder
2582   auto *OMPRegionInfo =
2583       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2584   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2585     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2586         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2587     return;
2588   }
2589 
2590   if (!CGF.HaveInsertPoint())
2591     return;
2592   // Build call __kmpc_cancel_barrier(loc, thread_id);
2593   // Build call __kmpc_barrier(loc, thread_id);
2594   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2595   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2596   // thread_id);
2597   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2598                          getThreadID(CGF, Loc)};
2599   if (OMPRegionInfo) {
2600     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2601       llvm::Value *Result = CGF.EmitRuntimeCall(
2602           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2603                                                 OMPRTL___kmpc_cancel_barrier),
2604           Args);
2605       if (EmitChecks) {
2606         // if (__kmpc_cancel_barrier()) {
2607         //   exit from construct;
2608         // }
2609         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2610         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2611         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2612         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2613         CGF.EmitBlock(ExitBB);
2614         //   exit from construct;
2615         CodeGenFunction::JumpDest CancelDestination =
2616             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2617         CGF.EmitBranchThroughCleanup(CancelDestination);
2618         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2619       }
2620       return;
2621     }
2622   }
2623   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2624                           CGM.getModule(), OMPRTL___kmpc_barrier),
2625                       Args);
2626 }
2627 
2628 /// Map the OpenMP loop schedule to the runtime enumeration.
2629 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2630                                           bool Chunked, bool Ordered) {
2631   switch (ScheduleKind) {
2632   case OMPC_SCHEDULE_static:
2633     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2634                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2635   case OMPC_SCHEDULE_dynamic:
2636     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2637   case OMPC_SCHEDULE_guided:
2638     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2639   case OMPC_SCHEDULE_runtime:
2640     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2641   case OMPC_SCHEDULE_auto:
2642     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2643   case OMPC_SCHEDULE_unknown:
2644     assert(!Chunked && "chunk was specified but schedule kind not known");
2645     return Ordered ? OMP_ord_static : OMP_sch_static;
2646   }
2647   llvm_unreachable("Unexpected runtime schedule");
2648 }
2649 
2650 /// Map the OpenMP distribute schedule to the runtime enumeration.
2651 static OpenMPSchedType
2652 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2653   // only static is allowed for dist_schedule
2654   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2655 }
2656 
2657 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2658                                          bool Chunked) const {
2659   OpenMPSchedType Schedule =
2660       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2661   return Schedule == OMP_sch_static;
2662 }
2663 
2664 bool CGOpenMPRuntime::isStaticNonchunked(
2665     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2666   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2667   return Schedule == OMP_dist_sch_static;
2668 }
2669 
2670 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2671                                       bool Chunked) const {
2672   OpenMPSchedType Schedule =
2673       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2674   return Schedule == OMP_sch_static_chunked;
2675 }
2676 
2677 bool CGOpenMPRuntime::isStaticChunked(
2678     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2679   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2680   return Schedule == OMP_dist_sch_static_chunked;
2681 }
2682 
2683 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2684   OpenMPSchedType Schedule =
2685       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2686   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2687   return Schedule != OMP_sch_static;
2688 }
2689 
2690 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2691                                   OpenMPScheduleClauseModifier M1,
2692                                   OpenMPScheduleClauseModifier M2) {
2693   int Modifier = 0;
2694   switch (M1) {
2695   case OMPC_SCHEDULE_MODIFIER_monotonic:
2696     Modifier = OMP_sch_modifier_monotonic;
2697     break;
2698   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2699     Modifier = OMP_sch_modifier_nonmonotonic;
2700     break;
2701   case OMPC_SCHEDULE_MODIFIER_simd:
2702     if (Schedule == OMP_sch_static_chunked)
2703       Schedule = OMP_sch_static_balanced_chunked;
2704     break;
2705   case OMPC_SCHEDULE_MODIFIER_last:
2706   case OMPC_SCHEDULE_MODIFIER_unknown:
2707     break;
2708   }
2709   switch (M2) {
2710   case OMPC_SCHEDULE_MODIFIER_monotonic:
2711     Modifier = OMP_sch_modifier_monotonic;
2712     break;
2713   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2714     Modifier = OMP_sch_modifier_nonmonotonic;
2715     break;
2716   case OMPC_SCHEDULE_MODIFIER_simd:
2717     if (Schedule == OMP_sch_static_chunked)
2718       Schedule = OMP_sch_static_balanced_chunked;
2719     break;
2720   case OMPC_SCHEDULE_MODIFIER_last:
2721   case OMPC_SCHEDULE_MODIFIER_unknown:
2722     break;
2723   }
2724   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2725   // If the static schedule kind is specified or if the ordered clause is
2726   // specified, and if the nonmonotonic modifier is not specified, the effect is
2727   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2728   // modifier is specified, the effect is as if the nonmonotonic modifier is
2729   // specified.
2730   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2731     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2732           Schedule == OMP_sch_static_balanced_chunked ||
2733           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2734           Schedule == OMP_dist_sch_static_chunked ||
2735           Schedule == OMP_dist_sch_static))
2736       Modifier = OMP_sch_modifier_nonmonotonic;
2737   }
2738   return Schedule | Modifier;
2739 }
2740 
2741 void CGOpenMPRuntime::emitForDispatchInit(
2742     CodeGenFunction &CGF, SourceLocation Loc,
2743     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2744     bool Ordered, const DispatchRTInput &DispatchValues) {
2745   if (!CGF.HaveInsertPoint())
2746     return;
2747   OpenMPSchedType Schedule = getRuntimeSchedule(
2748       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2749   assert(Ordered ||
2750          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2751           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2752           Schedule != OMP_sch_static_balanced_chunked));
2753   // Call __kmpc_dispatch_init(
2754   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2755   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2756   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2757 
2758   // If the Chunk was not specified in the clause - use default value 1.
2759   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2760                                             : CGF.Builder.getIntN(IVSize, 1);
2761   llvm::Value *Args[] = {
2762       emitUpdateLocation(CGF, Loc),
2763       getThreadID(CGF, Loc),
2764       CGF.Builder.getInt32(addMonoNonMonoModifier(
2765           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2766       DispatchValues.LB,                                     // Lower
2767       DispatchValues.UB,                                     // Upper
2768       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2769       Chunk                                                  // Chunk
2770   };
2771   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2772 }
2773 
2774 static void emitForStaticInitCall(
2775     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2776     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2777     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2778     const CGOpenMPRuntime::StaticRTInput &Values) {
2779   if (!CGF.HaveInsertPoint())
2780     return;
2781 
2782   assert(!Values.Ordered);
2783   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2784          Schedule == OMP_sch_static_balanced_chunked ||
2785          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2786          Schedule == OMP_dist_sch_static ||
2787          Schedule == OMP_dist_sch_static_chunked);
2788 
2789   // Call __kmpc_for_static_init(
2790   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2791   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2792   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2793   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2794   llvm::Value *Chunk = Values.Chunk;
2795   if (Chunk == nullptr) {
2796     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2797             Schedule == OMP_dist_sch_static) &&
2798            "expected static non-chunked schedule");
2799     // If the Chunk was not specified in the clause - use default value 1.
2800     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2801   } else {
2802     assert((Schedule == OMP_sch_static_chunked ||
2803             Schedule == OMP_sch_static_balanced_chunked ||
2804             Schedule == OMP_ord_static_chunked ||
2805             Schedule == OMP_dist_sch_static_chunked) &&
2806            "expected static chunked schedule");
2807   }
2808   llvm::Value *Args[] = {
2809       UpdateLocation,
2810       ThreadId,
2811       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2812                                                   M2)), // Schedule type
2813       Values.IL.getPointer(),                           // &isLastIter
2814       Values.LB.getPointer(),                           // &LB
2815       Values.UB.getPointer(),                           // &UB
2816       Values.ST.getPointer(),                           // &Stride
2817       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2818       Chunk                                             // Chunk
2819   };
2820   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2821 }
2822 
2823 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2824                                         SourceLocation Loc,
2825                                         OpenMPDirectiveKind DKind,
2826                                         const OpenMPScheduleTy &ScheduleKind,
2827                                         const StaticRTInput &Values) {
2828   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2829       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2830   assert(isOpenMPWorksharingDirective(DKind) &&
2831          "Expected loop-based or sections-based directive.");
2832   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2833                                              isOpenMPLoopDirective(DKind)
2834                                                  ? OMP_IDENT_WORK_LOOP
2835                                                  : OMP_IDENT_WORK_SECTIONS);
2836   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2837   llvm::FunctionCallee StaticInitFunction =
2838       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2839   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2840   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2841                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2842 }
2843 
2844 void CGOpenMPRuntime::emitDistributeStaticInit(
2845     CodeGenFunction &CGF, SourceLocation Loc,
2846     OpenMPDistScheduleClauseKind SchedKind,
2847     const CGOpenMPRuntime::StaticRTInput &Values) {
2848   OpenMPSchedType ScheduleNum =
2849       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2850   llvm::Value *UpdatedLocation =
2851       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2852   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2853   llvm::FunctionCallee StaticInitFunction;
2854   bool isGPUDistribute =
2855       CGM.getLangOpts().OpenMPIsDevice &&
2856       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2857   StaticInitFunction = createForStaticInitFunction(
2858       Values.IVSize, Values.IVSigned, isGPUDistribute);
2859 
2860   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2861                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2862                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2863 }
2864 
2865 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2866                                           SourceLocation Loc,
2867                                           OpenMPDirectiveKind DKind) {
2868   if (!CGF.HaveInsertPoint())
2869     return;
2870   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2871   llvm::Value *Args[] = {
2872       emitUpdateLocation(CGF, Loc,
2873                          isOpenMPDistributeDirective(DKind)
2874                              ? OMP_IDENT_WORK_DISTRIBUTE
2875                              : isOpenMPLoopDirective(DKind)
2876                                    ? OMP_IDENT_WORK_LOOP
2877                                    : OMP_IDENT_WORK_SECTIONS),
2878       getThreadID(CGF, Loc)};
2879   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2880   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2881       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2882     CGF.EmitRuntimeCall(
2883         OMPBuilder.getOrCreateRuntimeFunction(
2884             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2885         Args);
2886   else
2887     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2888                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2889                         Args);
2890 }
2891 
2892 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2893                                                  SourceLocation Loc,
2894                                                  unsigned IVSize,
2895                                                  bool IVSigned) {
2896   if (!CGF.HaveInsertPoint())
2897     return;
2898   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2899   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2900   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2901 }
2902 
2903 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2904                                           SourceLocation Loc, unsigned IVSize,
2905                                           bool IVSigned, Address IL,
2906                                           Address LB, Address UB,
2907                                           Address ST) {
2908   // Call __kmpc_dispatch_next(
2909   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2910   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2911   //          kmp_int[32|64] *p_stride);
2912   llvm::Value *Args[] = {
2913       emitUpdateLocation(CGF, Loc),
2914       getThreadID(CGF, Loc),
2915       IL.getPointer(), // &isLastIter
2916       LB.getPointer(), // &Lower
2917       UB.getPointer(), // &Upper
2918       ST.getPointer()  // &Stride
2919   };
2920   llvm::Value *Call =
2921       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2922   return CGF.EmitScalarConversion(
2923       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2924       CGF.getContext().BoolTy, Loc);
2925 }
2926 
2927 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2928                                            llvm::Value *NumThreads,
2929                                            SourceLocation Loc) {
2930   if (!CGF.HaveInsertPoint())
2931     return;
2932   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2933   llvm::Value *Args[] = {
2934       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2935       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2936   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2937                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2938                       Args);
2939 }
2940 
2941 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2942                                          ProcBindKind ProcBind,
2943                                          SourceLocation Loc) {
2944   if (!CGF.HaveInsertPoint())
2945     return;
2946   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2947   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2948   llvm::Value *Args[] = {
2949       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2950       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2951   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2952                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2953                       Args);
2954 }
2955 
2956 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2957                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2958   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2959     OMPBuilder.createFlush(CGF.Builder);
2960   } else {
2961     if (!CGF.HaveInsertPoint())
2962       return;
2963     // Build call void __kmpc_flush(ident_t *loc)
2964     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2965                             CGM.getModule(), OMPRTL___kmpc_flush),
2966                         emitUpdateLocation(CGF, Loc));
2967   }
2968 }
2969 
2970 namespace {
2971 /// Indexes of fields for type kmp_task_t.
2972 enum KmpTaskTFields {
2973   /// List of shared variables.
2974   KmpTaskTShareds,
2975   /// Task routine.
2976   KmpTaskTRoutine,
2977   /// Partition id for the untied tasks.
2978   KmpTaskTPartId,
2979   /// Function with call of destructors for private variables.
2980   Data1,
2981   /// Task priority.
2982   Data2,
2983   /// (Taskloops only) Lower bound.
2984   KmpTaskTLowerBound,
2985   /// (Taskloops only) Upper bound.
2986   KmpTaskTUpperBound,
2987   /// (Taskloops only) Stride.
2988   KmpTaskTStride,
2989   /// (Taskloops only) Is last iteration flag.
2990   KmpTaskTLastIter,
2991   /// (Taskloops only) Reduction data.
2992   KmpTaskTReductions,
2993 };
2994 } // anonymous namespace
2995 
2996 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2997   return OffloadEntriesTargetRegion.empty() &&
2998          OffloadEntriesDeviceGlobalVar.empty();
2999 }
3000 
3001 /// Initialize target region entry.
3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3003     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3004                                     StringRef ParentName, unsigned LineNum,
3005                                     unsigned Order) {
3006   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3007                                              "only required for the device "
3008                                              "code generation.");
3009   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3010       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3011                                    OMPTargetRegionEntryTargetRegion);
3012   ++OffloadingEntriesNum;
3013 }
3014 
3015 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3016     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3017                                   StringRef ParentName, unsigned LineNum,
3018                                   llvm::Constant *Addr, llvm::Constant *ID,
3019                                   OMPTargetRegionEntryKind Flags) {
3020   // If we are emitting code for a target, the entry is already initialized,
3021   // only has to be registered.
3022   if (CGM.getLangOpts().OpenMPIsDevice) {
3023     // This could happen if the device compilation is invoked standalone.
3024     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3025       return;
3026     auto &Entry =
3027         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3028     Entry.setAddress(Addr);
3029     Entry.setID(ID);
3030     Entry.setFlags(Flags);
3031   } else {
3032     if (Flags ==
3033             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3034         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3035                                  /*IgnoreAddressId*/ true))
3036       return;
3037     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3038            "Target region entry already registered!");
3039     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3040     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3041     ++OffloadingEntriesNum;
3042   }
3043 }
3044 
3045 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3046     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3047     bool IgnoreAddressId) const {
3048   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3049   if (PerDevice == OffloadEntriesTargetRegion.end())
3050     return false;
3051   auto PerFile = PerDevice->second.find(FileID);
3052   if (PerFile == PerDevice->second.end())
3053     return false;
3054   auto PerParentName = PerFile->second.find(ParentName);
3055   if (PerParentName == PerFile->second.end())
3056     return false;
3057   auto PerLine = PerParentName->second.find(LineNum);
3058   if (PerLine == PerParentName->second.end())
3059     return false;
3060   // Fail if this entry is already registered.
3061   if (!IgnoreAddressId &&
3062       (PerLine->second.getAddress() || PerLine->second.getID()))
3063     return false;
3064   return true;
3065 }
3066 
3067 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3068     const OffloadTargetRegionEntryInfoActTy &Action) {
3069   // Scan all target region entries and perform the provided action.
3070   for (const auto &D : OffloadEntriesTargetRegion)
3071     for (const auto &F : D.second)
3072       for (const auto &P : F.second)
3073         for (const auto &L : P.second)
3074           Action(D.first, F.first, P.first(), L.first, L.second);
3075 }
3076 
3077 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3078     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3079                                        OMPTargetGlobalVarEntryKind Flags,
3080                                        unsigned Order) {
3081   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3082                                              "only required for the device "
3083                                              "code generation.");
3084   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3085   ++OffloadingEntriesNum;
3086 }
3087 
3088 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3089     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3090                                      CharUnits VarSize,
3091                                      OMPTargetGlobalVarEntryKind Flags,
3092                                      llvm::GlobalValue::LinkageTypes Linkage) {
3093   if (CGM.getLangOpts().OpenMPIsDevice) {
3094     // This could happen if the device compilation is invoked standalone.
3095     if (!hasDeviceGlobalVarEntryInfo(VarName))
3096       return;
3097     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3098     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3099       if (Entry.getVarSize().isZero()) {
3100         Entry.setVarSize(VarSize);
3101         Entry.setLinkage(Linkage);
3102       }
3103       return;
3104     }
3105     Entry.setVarSize(VarSize);
3106     Entry.setLinkage(Linkage);
3107     Entry.setAddress(Addr);
3108   } else {
3109     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3110       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3111       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3112              "Entry not initialized!");
3113       if (Entry.getVarSize().isZero()) {
3114         Entry.setVarSize(VarSize);
3115         Entry.setLinkage(Linkage);
3116       }
3117       return;
3118     }
3119     OffloadEntriesDeviceGlobalVar.try_emplace(
3120         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3121     ++OffloadingEntriesNum;
3122   }
3123 }
3124 
3125 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3126     actOnDeviceGlobalVarEntriesInfo(
3127         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3128   // Scan all target region entries and perform the provided action.
3129   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3130     Action(E.getKey(), E.getValue());
3131 }
3132 
3133 void CGOpenMPRuntime::createOffloadEntry(
3134     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3135     llvm::GlobalValue::LinkageTypes Linkage) {
3136   StringRef Name = Addr->getName();
3137   llvm::Module &M = CGM.getModule();
3138   llvm::LLVMContext &C = M.getContext();
3139 
3140   // Create constant string with the name.
3141   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3142 
3143   std::string StringName = getName({"omp_offloading", "entry_name"});
3144   auto *Str = new llvm::GlobalVariable(
3145       M, StrPtrInit->getType(), /*isConstant=*/true,
3146       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3147   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3148 
3149   llvm::Constant *Data[] = {
3150       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3151       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3152       llvm::ConstantInt::get(CGM.SizeTy, Size),
3153       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3154       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3155   std::string EntryName = getName({"omp_offloading", "entry", ""});
3156   llvm::GlobalVariable *Entry = createGlobalStruct(
3157       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3158       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3159 
3160   // The entry has to be created in the section the linker expects it to be.
3161   Entry->setSection("omp_offloading_entries");
3162 }
3163 
3164 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3165   // Emit the offloading entries and metadata so that the device codegen side
3166   // can easily figure out what to emit. The produced metadata looks like
3167   // this:
3168   //
3169   // !omp_offload.info = !{!1, ...}
3170   //
3171   // Right now we only generate metadata for function that contain target
3172   // regions.
3173 
3174   // If we are in simd mode or there are no entries, we don't need to do
3175   // anything.
3176   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3177     return;
3178 
3179   llvm::Module &M = CGM.getModule();
3180   llvm::LLVMContext &C = M.getContext();
3181   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3182                          SourceLocation, StringRef>,
3183               16>
3184       OrderedEntries(OffloadEntriesInfoManager.size());
3185   llvm::SmallVector<StringRef, 16> ParentFunctions(
3186       OffloadEntriesInfoManager.size());
3187 
3188   // Auxiliary methods to create metadata values and strings.
3189   auto &&GetMDInt = [this](unsigned V) {
3190     return llvm::ConstantAsMetadata::get(
3191         llvm::ConstantInt::get(CGM.Int32Ty, V));
3192   };
3193 
3194   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3195 
3196   // Create the offloading info metadata node.
3197   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3198 
3199   // Create function that emits metadata for each target region entry;
3200   auto &&TargetRegionMetadataEmitter =
3201       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3202        &GetMDString](
3203           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3204           unsigned Line,
3205           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3206         // Generate metadata for target regions. Each entry of this metadata
3207         // contains:
3208         // - Entry 0 -> Kind of this type of metadata (0).
3209         // - Entry 1 -> Device ID of the file where the entry was identified.
3210         // - Entry 2 -> File ID of the file where the entry was identified.
3211         // - Entry 3 -> Mangled name of the function where the entry was
3212         // identified.
3213         // - Entry 4 -> Line in the file where the entry was identified.
3214         // - Entry 5 -> Order the entry was created.
3215         // The first element of the metadata node is the kind.
3216         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3217                                  GetMDInt(FileID),      GetMDString(ParentName),
3218                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3219 
3220         SourceLocation Loc;
3221         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3222                   E = CGM.getContext().getSourceManager().fileinfo_end();
3223              I != E; ++I) {
3224           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3225               I->getFirst()->getUniqueID().getFile() == FileID) {
3226             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3227                 I->getFirst(), Line, 1);
3228             break;
3229           }
3230         }
3231         // Save this entry in the right position of the ordered entries array.
3232         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3233         ParentFunctions[E.getOrder()] = ParentName;
3234 
3235         // Add metadata to the named metadata node.
3236         MD->addOperand(llvm::MDNode::get(C, Ops));
3237       };
3238 
3239   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3240       TargetRegionMetadataEmitter);
3241 
3242   // Create function that emits metadata for each device global variable entry;
3243   auto &&DeviceGlobalVarMetadataEmitter =
3244       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3245        MD](StringRef MangledName,
3246            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3247                &E) {
3248         // Generate metadata for global variables. Each entry of this metadata
3249         // contains:
3250         // - Entry 0 -> Kind of this type of metadata (1).
3251         // - Entry 1 -> Mangled name of the variable.
3252         // - Entry 2 -> Declare target kind.
3253         // - Entry 3 -> Order the entry was created.
3254         // The first element of the metadata node is the kind.
3255         llvm::Metadata *Ops[] = {
3256             GetMDInt(E.getKind()), GetMDString(MangledName),
3257             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3258 
3259         // Save this entry in the right position of the ordered entries array.
3260         OrderedEntries[E.getOrder()] =
3261             std::make_tuple(&E, SourceLocation(), MangledName);
3262 
3263         // Add metadata to the named metadata node.
3264         MD->addOperand(llvm::MDNode::get(C, Ops));
3265       };
3266 
3267   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3268       DeviceGlobalVarMetadataEmitter);
3269 
3270   for (const auto &E : OrderedEntries) {
3271     assert(std::get<0>(E) && "All ordered entries must exist!");
3272     if (const auto *CE =
3273             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3274                 std::get<0>(E))) {
3275       if (!CE->getID() || !CE->getAddress()) {
3276         // Do not blame the entry if the parent funtion is not emitted.
3277         StringRef FnName = ParentFunctions[CE->getOrder()];
3278         if (!CGM.GetGlobalValue(FnName))
3279           continue;
3280         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3281             DiagnosticsEngine::Error,
3282             "Offloading entry for target region in %0 is incorrect: either the "
3283             "address or the ID is invalid.");
3284         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3285         continue;
3286       }
3287       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3288                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3289     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3290                                              OffloadEntryInfoDeviceGlobalVar>(
3291                    std::get<0>(E))) {
3292       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3293           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3294               CE->getFlags());
3295       switch (Flags) {
3296       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3297         if (CGM.getLangOpts().OpenMPIsDevice &&
3298             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3299           continue;
3300         if (!CE->getAddress()) {
3301           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3302               DiagnosticsEngine::Error, "Offloading entry for declare target "
3303                                         "variable %0 is incorrect: the "
3304                                         "address is invalid.");
3305           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3306           continue;
3307         }
3308         // The vaiable has no definition - no need to add the entry.
3309         if (CE->getVarSize().isZero())
3310           continue;
3311         break;
3312       }
3313       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3314         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3315                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3316                "Declaret target link address is set.");
3317         if (CGM.getLangOpts().OpenMPIsDevice)
3318           continue;
3319         if (!CE->getAddress()) {
3320           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3321               DiagnosticsEngine::Error,
3322               "Offloading entry for declare target variable is incorrect: the "
3323               "address is invalid.");
3324           CGM.getDiags().Report(DiagID);
3325           continue;
3326         }
3327         break;
3328       }
3329       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3330                          CE->getVarSize().getQuantity(), Flags,
3331                          CE->getLinkage());
3332     } else {
3333       llvm_unreachable("Unsupported entry kind.");
3334     }
3335   }
3336 }
3337 
3338 /// Loads all the offload entries information from the host IR
3339 /// metadata.
3340 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3341   // If we are in target mode, load the metadata from the host IR. This code has
3342   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3343 
3344   if (!CGM.getLangOpts().OpenMPIsDevice)
3345     return;
3346 
3347   if (CGM.getLangOpts().OMPHostIRFile.empty())
3348     return;
3349 
3350   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3351   if (auto EC = Buf.getError()) {
3352     CGM.getDiags().Report(diag::err_cannot_open_file)
3353         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3354     return;
3355   }
3356 
3357   llvm::LLVMContext C;
3358   auto ME = expectedToErrorOrAndEmitErrors(
3359       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3360 
3361   if (auto EC = ME.getError()) {
3362     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3363         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3364     CGM.getDiags().Report(DiagID)
3365         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3366     return;
3367   }
3368 
3369   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3370   if (!MD)
3371     return;
3372 
3373   for (llvm::MDNode *MN : MD->operands()) {
3374     auto &&GetMDInt = [MN](unsigned Idx) {
3375       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3376       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3377     };
3378 
3379     auto &&GetMDString = [MN](unsigned Idx) {
3380       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3381       return V->getString();
3382     };
3383 
3384     switch (GetMDInt(0)) {
3385     default:
3386       llvm_unreachable("Unexpected metadata!");
3387       break;
3388     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3389         OffloadingEntryInfoTargetRegion:
3390       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3391           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3392           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3393           /*Order=*/GetMDInt(5));
3394       break;
3395     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3396         OffloadingEntryInfoDeviceGlobalVar:
3397       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3398           /*MangledName=*/GetMDString(1),
3399           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3400               /*Flags=*/GetMDInt(2)),
3401           /*Order=*/GetMDInt(3));
3402       break;
3403     }
3404   }
3405 }
3406 
3407 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3408   if (!KmpRoutineEntryPtrTy) {
3409     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3410     ASTContext &C = CGM.getContext();
3411     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3412     FunctionProtoType::ExtProtoInfo EPI;
3413     KmpRoutineEntryPtrQTy = C.getPointerType(
3414         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3415     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3416   }
3417 }
3418 
3419 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3420   // Make sure the type of the entry is already created. This is the type we
3421   // have to create:
3422   // struct __tgt_offload_entry{
3423   //   void      *addr;       // Pointer to the offload entry info.
3424   //                          // (function or global)
3425   //   char      *name;       // Name of the function or global.
3426   //   size_t     size;       // Size of the entry info (0 if it a function).
3427   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3428   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3429   // };
3430   if (TgtOffloadEntryQTy.isNull()) {
3431     ASTContext &C = CGM.getContext();
3432     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3433     RD->startDefinition();
3434     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3435     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3436     addFieldToRecordDecl(C, RD, C.getSizeType());
3437     addFieldToRecordDecl(
3438         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3439     addFieldToRecordDecl(
3440         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3441     RD->completeDefinition();
3442     RD->addAttr(PackedAttr::CreateImplicit(C));
3443     TgtOffloadEntryQTy = C.getRecordType(RD);
3444   }
3445   return TgtOffloadEntryQTy;
3446 }
3447 
3448 namespace {
3449 struct PrivateHelpersTy {
3450   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3451                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3452       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3453         PrivateElemInit(PrivateElemInit) {}
3454   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3455   const Expr *OriginalRef = nullptr;
3456   const VarDecl *Original = nullptr;
3457   const VarDecl *PrivateCopy = nullptr;
3458   const VarDecl *PrivateElemInit = nullptr;
3459   bool isLocalPrivate() const {
3460     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3461   }
3462 };
3463 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3464 } // anonymous namespace
3465 
3466 static bool isAllocatableDecl(const VarDecl *VD) {
3467   const VarDecl *CVD = VD->getCanonicalDecl();
3468   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3469     return false;
3470   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3471   // Use the default allocation.
3472   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3473             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3474            !AA->getAllocator());
3475 }
3476 
3477 static RecordDecl *
3478 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3479   if (!Privates.empty()) {
3480     ASTContext &C = CGM.getContext();
3481     // Build struct .kmp_privates_t. {
3482     //         /*  private vars  */
3483     //       };
3484     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3485     RD->startDefinition();
3486     for (const auto &Pair : Privates) {
3487       const VarDecl *VD = Pair.second.Original;
3488       QualType Type = VD->getType().getNonReferenceType();
3489       // If the private variable is a local variable with lvalue ref type,
3490       // allocate the pointer instead of the pointee type.
3491       if (Pair.second.isLocalPrivate()) {
3492         if (VD->getType()->isLValueReferenceType())
3493           Type = C.getPointerType(Type);
3494         if (isAllocatableDecl(VD))
3495           Type = C.getPointerType(Type);
3496       }
3497       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3498       if (VD->hasAttrs()) {
3499         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3500              E(VD->getAttrs().end());
3501              I != E; ++I)
3502           FD->addAttr(*I);
3503       }
3504     }
3505     RD->completeDefinition();
3506     return RD;
3507   }
3508   return nullptr;
3509 }
3510 
3511 static RecordDecl *
3512 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3513                          QualType KmpInt32Ty,
3514                          QualType KmpRoutineEntryPointerQTy) {
3515   ASTContext &C = CGM.getContext();
3516   // Build struct kmp_task_t {
3517   //         void *              shareds;
3518   //         kmp_routine_entry_t routine;
3519   //         kmp_int32           part_id;
3520   //         kmp_cmplrdata_t data1;
3521   //         kmp_cmplrdata_t data2;
3522   // For taskloops additional fields:
3523   //         kmp_uint64          lb;
3524   //         kmp_uint64          ub;
3525   //         kmp_int64           st;
3526   //         kmp_int32           liter;
3527   //         void *              reductions;
3528   //       };
3529   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3530   UD->startDefinition();
3531   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3532   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3533   UD->completeDefinition();
3534   QualType KmpCmplrdataTy = C.getRecordType(UD);
3535   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3536   RD->startDefinition();
3537   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3538   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3539   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3540   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3541   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3542   if (isOpenMPTaskLoopDirective(Kind)) {
3543     QualType KmpUInt64Ty =
3544         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3545     QualType KmpInt64Ty =
3546         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3547     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3548     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3549     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3550     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3551     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3552   }
3553   RD->completeDefinition();
3554   return RD;
3555 }
3556 
3557 static RecordDecl *
3558 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3559                                      ArrayRef<PrivateDataTy> Privates) {
3560   ASTContext &C = CGM.getContext();
3561   // Build struct kmp_task_t_with_privates {
3562   //         kmp_task_t task_data;
3563   //         .kmp_privates_t. privates;
3564   //       };
3565   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3566   RD->startDefinition();
3567   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3568   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3569     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3570   RD->completeDefinition();
3571   return RD;
3572 }
3573 
3574 /// Emit a proxy function which accepts kmp_task_t as the second
3575 /// argument.
3576 /// \code
3577 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3578 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3579 ///   For taskloops:
3580 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3581 ///   tt->reductions, tt->shareds);
3582 ///   return 0;
3583 /// }
3584 /// \endcode
3585 static llvm::Function *
3586 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3587                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3588                       QualType KmpTaskTWithPrivatesPtrQTy,
3589                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3590                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3591                       llvm::Value *TaskPrivatesMap) {
3592   ASTContext &C = CGM.getContext();
3593   FunctionArgList Args;
3594   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3595                             ImplicitParamDecl::Other);
3596   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3597                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3598                                 ImplicitParamDecl::Other);
3599   Args.push_back(&GtidArg);
3600   Args.push_back(&TaskTypeArg);
3601   const auto &TaskEntryFnInfo =
3602       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3603   llvm::FunctionType *TaskEntryTy =
3604       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3605   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3606   auto *TaskEntry = llvm::Function::Create(
3607       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3608   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3609   TaskEntry->setDoesNotRecurse();
3610   CodeGenFunction CGF(CGM);
3611   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3612                     Loc, Loc);
3613 
3614   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3615   // tt,
3616   // For taskloops:
3617   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3618   // tt->task_data.shareds);
3619   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3620       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3621   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3622       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3623       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3624   const auto *KmpTaskTWithPrivatesQTyRD =
3625       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3626   LValue Base =
3627       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3628   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3629   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3630   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3631   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3632 
3633   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3634   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3635   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3636       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3637       CGF.ConvertTypeForMem(SharedsPtrTy));
3638 
3639   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3640   llvm::Value *PrivatesParam;
3641   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3642     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3643     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3644         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3645   } else {
3646     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3647   }
3648 
3649   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3650                                TaskPrivatesMap,
3651                                CGF.Builder
3652                                    .CreatePointerBitCastOrAddrSpaceCast(
3653                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3654                                    .getPointer()};
3655   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3656                                           std::end(CommonArgs));
3657   if (isOpenMPTaskLoopDirective(Kind)) {
3658     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3659     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3660     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3661     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3662     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3663     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3664     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3665     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3666     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3667     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3668     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3669     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3670     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3671     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3672     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3673     CallArgs.push_back(LBParam);
3674     CallArgs.push_back(UBParam);
3675     CallArgs.push_back(StParam);
3676     CallArgs.push_back(LIParam);
3677     CallArgs.push_back(RParam);
3678   }
3679   CallArgs.push_back(SharedsParam);
3680 
3681   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3682                                                   CallArgs);
3683   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3684                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3685   CGF.FinishFunction();
3686   return TaskEntry;
3687 }
3688 
3689 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3690                                             SourceLocation Loc,
3691                                             QualType KmpInt32Ty,
3692                                             QualType KmpTaskTWithPrivatesPtrQTy,
3693                                             QualType KmpTaskTWithPrivatesQTy) {
3694   ASTContext &C = CGM.getContext();
3695   FunctionArgList Args;
3696   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3697                             ImplicitParamDecl::Other);
3698   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3699                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3700                                 ImplicitParamDecl::Other);
3701   Args.push_back(&GtidArg);
3702   Args.push_back(&TaskTypeArg);
3703   const auto &DestructorFnInfo =
3704       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3705   llvm::FunctionType *DestructorFnTy =
3706       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3707   std::string Name =
3708       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3709   auto *DestructorFn =
3710       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3711                              Name, &CGM.getModule());
3712   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3713                                     DestructorFnInfo);
3714   DestructorFn->setDoesNotRecurse();
3715   CodeGenFunction CGF(CGM);
3716   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3717                     Args, Loc, Loc);
3718 
3719   LValue Base = CGF.EmitLoadOfPointerLValue(
3720       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3721       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3722   const auto *KmpTaskTWithPrivatesQTyRD =
3723       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3724   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3725   Base = CGF.EmitLValueForField(Base, *FI);
3726   for (const auto *Field :
3727        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3728     if (QualType::DestructionKind DtorKind =
3729             Field->getType().isDestructedType()) {
3730       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3731       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3732     }
3733   }
3734   CGF.FinishFunction();
3735   return DestructorFn;
3736 }
3737 
3738 /// Emit a privates mapping function for correct handling of private and
3739 /// firstprivate variables.
3740 /// \code
3741 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3742 /// **noalias priv1,...,  <tyn> **noalias privn) {
3743 ///   *priv1 = &.privates.priv1;
3744 ///   ...;
3745 ///   *privn = &.privates.privn;
3746 /// }
3747 /// \endcode
3748 static llvm::Value *
3749 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3750                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3751                                ArrayRef<PrivateDataTy> Privates) {
3752   ASTContext &C = CGM.getContext();
3753   FunctionArgList Args;
3754   ImplicitParamDecl TaskPrivatesArg(
3755       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3756       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3757       ImplicitParamDecl::Other);
3758   Args.push_back(&TaskPrivatesArg);
3759   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3760   unsigned Counter = 1;
3761   for (const Expr *E : Data.PrivateVars) {
3762     Args.push_back(ImplicitParamDecl::Create(
3763         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3764         C.getPointerType(C.getPointerType(E->getType()))
3765             .withConst()
3766             .withRestrict(),
3767         ImplicitParamDecl::Other));
3768     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3769     PrivateVarsPos[VD] = Counter;
3770     ++Counter;
3771   }
3772   for (const Expr *E : Data.FirstprivateVars) {
3773     Args.push_back(ImplicitParamDecl::Create(
3774         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3775         C.getPointerType(C.getPointerType(E->getType()))
3776             .withConst()
3777             .withRestrict(),
3778         ImplicitParamDecl::Other));
3779     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3780     PrivateVarsPos[VD] = Counter;
3781     ++Counter;
3782   }
3783   for (const Expr *E : Data.LastprivateVars) {
3784     Args.push_back(ImplicitParamDecl::Create(
3785         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3786         C.getPointerType(C.getPointerType(E->getType()))
3787             .withConst()
3788             .withRestrict(),
3789         ImplicitParamDecl::Other));
3790     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3791     PrivateVarsPos[VD] = Counter;
3792     ++Counter;
3793   }
3794   for (const VarDecl *VD : Data.PrivateLocals) {
3795     QualType Ty = VD->getType().getNonReferenceType();
3796     if (VD->getType()->isLValueReferenceType())
3797       Ty = C.getPointerType(Ty);
3798     if (isAllocatableDecl(VD))
3799       Ty = C.getPointerType(Ty);
3800     Args.push_back(ImplicitParamDecl::Create(
3801         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3802         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3803         ImplicitParamDecl::Other));
3804     PrivateVarsPos[VD] = Counter;
3805     ++Counter;
3806   }
3807   const auto &TaskPrivatesMapFnInfo =
3808       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3809   llvm::FunctionType *TaskPrivatesMapTy =
3810       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3811   std::string Name =
3812       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3813   auto *TaskPrivatesMap = llvm::Function::Create(
3814       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3815       &CGM.getModule());
3816   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3817                                     TaskPrivatesMapFnInfo);
3818   if (CGM.getLangOpts().Optimize) {
3819     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3820     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3821     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3822   }
3823   CodeGenFunction CGF(CGM);
3824   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3825                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3826 
3827   // *privi = &.privates.privi;
3828   LValue Base = CGF.EmitLoadOfPointerLValue(
3829       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3830       TaskPrivatesArg.getType()->castAs<PointerType>());
3831   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3832   Counter = 0;
3833   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3834     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3835     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3836     LValue RefLVal =
3837         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3838     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3839         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3840     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3841     ++Counter;
3842   }
3843   CGF.FinishFunction();
3844   return TaskPrivatesMap;
3845 }
3846 
3847 /// Emit initialization for private variables in task-based directives.
3848 static void emitPrivatesInit(CodeGenFunction &CGF,
3849                              const OMPExecutableDirective &D,
3850                              Address KmpTaskSharedsPtr, LValue TDBase,
3851                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3852                              QualType SharedsTy, QualType SharedsPtrTy,
3853                              const OMPTaskDataTy &Data,
3854                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3855   ASTContext &C = CGF.getContext();
3856   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3857   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3858   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3859                                  ? OMPD_taskloop
3860                                  : OMPD_task;
3861   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3862   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3863   LValue SrcBase;
3864   bool IsTargetTask =
3865       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3866       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3867   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3868   // PointersArray, SizesArray, and MappersArray. The original variables for
3869   // these arrays are not captured and we get their addresses explicitly.
3870   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3871       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3872     SrcBase = CGF.MakeAddrLValue(
3873         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3874             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3875         SharedsTy);
3876   }
3877   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3878   for (const PrivateDataTy &Pair : Privates) {
3879     // Do not initialize private locals.
3880     if (Pair.second.isLocalPrivate()) {
3881       ++FI;
3882       continue;
3883     }
3884     const VarDecl *VD = Pair.second.PrivateCopy;
3885     const Expr *Init = VD->getAnyInitializer();
3886     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3887                              !CGF.isTrivialInitializer(Init)))) {
3888       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3889       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3890         const VarDecl *OriginalVD = Pair.second.Original;
3891         // Check if the variable is the target-based BasePointersArray,
3892         // PointersArray, SizesArray, or MappersArray.
3893         LValue SharedRefLValue;
3894         QualType Type = PrivateLValue.getType();
3895         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3896         if (IsTargetTask && !SharedField) {
3897           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3898                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3899                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3900                          ->getNumParams() == 0 &&
3901                  isa<TranslationUnitDecl>(
3902                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3903                          ->getDeclContext()) &&
3904                  "Expected artificial target data variable.");
3905           SharedRefLValue =
3906               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3907         } else if (ForDup) {
3908           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3909           SharedRefLValue = CGF.MakeAddrLValue(
3910               Address(SharedRefLValue.getPointer(CGF),
3911                       C.getDeclAlign(OriginalVD)),
3912               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3913               SharedRefLValue.getTBAAInfo());
3914         } else if (CGF.LambdaCaptureFields.count(
3915                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3916                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3917           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3918         } else {
3919           // Processing for implicitly captured variables.
3920           InlinedOpenMPRegionRAII Region(
3921               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3922               /*HasCancel=*/false, /*NoInheritance=*/true);
3923           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3924         }
3925         if (Type->isArrayType()) {
3926           // Initialize firstprivate array.
3927           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3928             // Perform simple memcpy.
3929             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3930           } else {
3931             // Initialize firstprivate array using element-by-element
3932             // initialization.
3933             CGF.EmitOMPAggregateAssign(
3934                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3935                 Type,
3936                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3937                                                   Address SrcElement) {
3938                   // Clean up any temporaries needed by the initialization.
3939                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3940                   InitScope.addPrivate(
3941                       Elem, [SrcElement]() -> Address { return SrcElement; });
3942                   (void)InitScope.Privatize();
3943                   // Emit initialization for single element.
3944                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3945                       CGF, &CapturesInfo);
3946                   CGF.EmitAnyExprToMem(Init, DestElement,
3947                                        Init->getType().getQualifiers(),
3948                                        /*IsInitializer=*/false);
3949                 });
3950           }
3951         } else {
3952           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3953           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3954             return SharedRefLValue.getAddress(CGF);
3955           });
3956           (void)InitScope.Privatize();
3957           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3958           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3959                              /*capturedByInit=*/false);
3960         }
3961       } else {
3962         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3963       }
3964     }
3965     ++FI;
3966   }
3967 }
3968 
3969 /// Check if duplication function is required for taskloops.
3970 static bool checkInitIsRequired(CodeGenFunction &CGF,
3971                                 ArrayRef<PrivateDataTy> Privates) {
3972   bool InitRequired = false;
3973   for (const PrivateDataTy &Pair : Privates) {
3974     if (Pair.second.isLocalPrivate())
3975       continue;
3976     const VarDecl *VD = Pair.second.PrivateCopy;
3977     const Expr *Init = VD->getAnyInitializer();
3978     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3979                                     !CGF.isTrivialInitializer(Init));
3980     if (InitRequired)
3981       break;
3982   }
3983   return InitRequired;
3984 }
3985 
3986 
3987 /// Emit task_dup function (for initialization of
3988 /// private/firstprivate/lastprivate vars and last_iter flag)
3989 /// \code
3990 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3991 /// lastpriv) {
3992 /// // setup lastprivate flag
3993 ///    task_dst->last = lastpriv;
3994 /// // could be constructor calls here...
3995 /// }
3996 /// \endcode
3997 static llvm::Value *
3998 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3999                     const OMPExecutableDirective &D,
4000                     QualType KmpTaskTWithPrivatesPtrQTy,
4001                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4002                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4003                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4004                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4005   ASTContext &C = CGM.getContext();
4006   FunctionArgList Args;
4007   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4008                            KmpTaskTWithPrivatesPtrQTy,
4009                            ImplicitParamDecl::Other);
4010   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4011                            KmpTaskTWithPrivatesPtrQTy,
4012                            ImplicitParamDecl::Other);
4013   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4014                                 ImplicitParamDecl::Other);
4015   Args.push_back(&DstArg);
4016   Args.push_back(&SrcArg);
4017   Args.push_back(&LastprivArg);
4018   const auto &TaskDupFnInfo =
4019       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4020   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4021   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4022   auto *TaskDup = llvm::Function::Create(
4023       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4024   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4025   TaskDup->setDoesNotRecurse();
4026   CodeGenFunction CGF(CGM);
4027   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4028                     Loc);
4029 
4030   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4031       CGF.GetAddrOfLocalVar(&DstArg),
4032       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4033   // task_dst->liter = lastpriv;
4034   if (WithLastIter) {
4035     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4036     LValue Base = CGF.EmitLValueForField(
4037         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4038     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4039     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4040         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4041     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4042   }
4043 
4044   // Emit initial values for private copies (if any).
4045   assert(!Privates.empty());
4046   Address KmpTaskSharedsPtr = Address::invalid();
4047   if (!Data.FirstprivateVars.empty()) {
4048     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4049         CGF.GetAddrOfLocalVar(&SrcArg),
4050         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4051     LValue Base = CGF.EmitLValueForField(
4052         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4053     KmpTaskSharedsPtr = Address(
4054         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4055                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4056                                                   KmpTaskTShareds)),
4057                              Loc),
4058         CGM.getNaturalTypeAlignment(SharedsTy));
4059   }
4060   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4061                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4062   CGF.FinishFunction();
4063   return TaskDup;
4064 }
4065 
4066 /// Checks if destructor function is required to be generated.
4067 /// \return true if cleanups are required, false otherwise.
4068 static bool
4069 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4070                          ArrayRef<PrivateDataTy> Privates) {
4071   for (const PrivateDataTy &P : Privates) {
4072     if (P.second.isLocalPrivate())
4073       continue;
4074     QualType Ty = P.second.Original->getType().getNonReferenceType();
4075     if (Ty.isDestructedType())
4076       return true;
4077   }
4078   return false;
4079 }
4080 
4081 namespace {
4082 /// Loop generator for OpenMP iterator expression.
4083 class OMPIteratorGeneratorScope final
4084     : public CodeGenFunction::OMPPrivateScope {
4085   CodeGenFunction &CGF;
4086   const OMPIteratorExpr *E = nullptr;
4087   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4088   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4089   OMPIteratorGeneratorScope() = delete;
4090   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4091 
4092 public:
4093   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4094       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4095     if (!E)
4096       return;
4097     SmallVector<llvm::Value *, 4> Uppers;
4098     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4099       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4100       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4101       addPrivate(VD, [&CGF, VD]() {
4102         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4103       });
4104       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4105       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4106         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4107                                  "counter.addr");
4108       });
4109     }
4110     Privatize();
4111 
4112     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4113       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4114       LValue CLVal =
4115           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4116                              HelperData.CounterVD->getType());
4117       // Counter = 0;
4118       CGF.EmitStoreOfScalar(
4119           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4120           CLVal);
4121       CodeGenFunction::JumpDest &ContDest =
4122           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4123       CodeGenFunction::JumpDest &ExitDest =
4124           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4125       // N = <number-of_iterations>;
4126       llvm::Value *N = Uppers[I];
4127       // cont:
4128       // if (Counter < N) goto body; else goto exit;
4129       CGF.EmitBlock(ContDest.getBlock());
4130       auto *CVal =
4131           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4132       llvm::Value *Cmp =
4133           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4134               ? CGF.Builder.CreateICmpSLT(CVal, N)
4135               : CGF.Builder.CreateICmpULT(CVal, N);
4136       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4137       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4138       // body:
4139       CGF.EmitBlock(BodyBB);
4140       // Iteri = Begini + Counter * Stepi;
4141       CGF.EmitIgnoredExpr(HelperData.Update);
4142     }
4143   }
4144   ~OMPIteratorGeneratorScope() {
4145     if (!E)
4146       return;
4147     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4148       // Counter = Counter + 1;
4149       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4150       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4151       // goto cont;
4152       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4153       // exit:
4154       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4155     }
4156   }
4157 };
4158 } // namespace
4159 
4160 static std::pair<llvm::Value *, llvm::Value *>
4161 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4162   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4163   llvm::Value *Addr;
4164   if (OASE) {
4165     const Expr *Base = OASE->getBase();
4166     Addr = CGF.EmitScalarExpr(Base);
4167   } else {
4168     Addr = CGF.EmitLValue(E).getPointer(CGF);
4169   }
4170   llvm::Value *SizeVal;
4171   QualType Ty = E->getType();
4172   if (OASE) {
4173     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4174     for (const Expr *SE : OASE->getDimensions()) {
4175       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4176       Sz = CGF.EmitScalarConversion(
4177           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4178       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4179     }
4180   } else if (const auto *ASE =
4181                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4182     LValue UpAddrLVal =
4183         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4184     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4185     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4186         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4187     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4188     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4189     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4190   } else {
4191     SizeVal = CGF.getTypeSize(Ty);
4192   }
4193   return std::make_pair(Addr, SizeVal);
4194 }
4195 
4196 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4197 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4198   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4199   if (KmpTaskAffinityInfoTy.isNull()) {
4200     RecordDecl *KmpAffinityInfoRD =
4201         C.buildImplicitRecord("kmp_task_affinity_info_t");
4202     KmpAffinityInfoRD->startDefinition();
4203     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4204     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4205     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4206     KmpAffinityInfoRD->completeDefinition();
4207     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4208   }
4209 }
4210 
4211 CGOpenMPRuntime::TaskResultTy
4212 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4213                               const OMPExecutableDirective &D,
4214                               llvm::Function *TaskFunction, QualType SharedsTy,
4215                               Address Shareds, const OMPTaskDataTy &Data) {
4216   ASTContext &C = CGM.getContext();
4217   llvm::SmallVector<PrivateDataTy, 4> Privates;
4218   // Aggregate privates and sort them by the alignment.
4219   const auto *I = Data.PrivateCopies.begin();
4220   for (const Expr *E : Data.PrivateVars) {
4221     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4222     Privates.emplace_back(
4223         C.getDeclAlign(VD),
4224         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4225                          /*PrivateElemInit=*/nullptr));
4226     ++I;
4227   }
4228   I = Data.FirstprivateCopies.begin();
4229   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4230   for (const Expr *E : Data.FirstprivateVars) {
4231     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4232     Privates.emplace_back(
4233         C.getDeclAlign(VD),
4234         PrivateHelpersTy(
4235             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4236             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4237     ++I;
4238     ++IElemInitRef;
4239   }
4240   I = Data.LastprivateCopies.begin();
4241   for (const Expr *E : Data.LastprivateVars) {
4242     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4243     Privates.emplace_back(
4244         C.getDeclAlign(VD),
4245         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4246                          /*PrivateElemInit=*/nullptr));
4247     ++I;
4248   }
4249   for (const VarDecl *VD : Data.PrivateLocals) {
4250     if (isAllocatableDecl(VD))
4251       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4252     else
4253       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4254   }
4255   llvm::stable_sort(Privates,
4256                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4257                       return L.first > R.first;
4258                     });
4259   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4260   // Build type kmp_routine_entry_t (if not built yet).
4261   emitKmpRoutineEntryT(KmpInt32Ty);
4262   // Build type kmp_task_t (if not built yet).
4263   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4264     if (SavedKmpTaskloopTQTy.isNull()) {
4265       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4266           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4267     }
4268     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4269   } else {
4270     assert((D.getDirectiveKind() == OMPD_task ||
4271             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4272             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4273            "Expected taskloop, task or target directive");
4274     if (SavedKmpTaskTQTy.isNull()) {
4275       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4276           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4277     }
4278     KmpTaskTQTy = SavedKmpTaskTQTy;
4279   }
4280   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4281   // Build particular struct kmp_task_t for the given task.
4282   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4283       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4284   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4285   QualType KmpTaskTWithPrivatesPtrQTy =
4286       C.getPointerType(KmpTaskTWithPrivatesQTy);
4287   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4288   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4289       KmpTaskTWithPrivatesTy->getPointerTo();
4290   llvm::Value *KmpTaskTWithPrivatesTySize =
4291       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4292   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4293 
4294   // Emit initial values for private copies (if any).
4295   llvm::Value *TaskPrivatesMap = nullptr;
4296   llvm::Type *TaskPrivatesMapTy =
4297       std::next(TaskFunction->arg_begin(), 3)->getType();
4298   if (!Privates.empty()) {
4299     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4300     TaskPrivatesMap =
4301         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4302     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4303         TaskPrivatesMap, TaskPrivatesMapTy);
4304   } else {
4305     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4306         cast<llvm::PointerType>(TaskPrivatesMapTy));
4307   }
4308   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4309   // kmp_task_t *tt);
4310   llvm::Function *TaskEntry = emitProxyTaskFunction(
4311       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4312       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4313       TaskPrivatesMap);
4314 
4315   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4316   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4317   // kmp_routine_entry_t *task_entry);
4318   // Task flags. Format is taken from
4319   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4320   // description of kmp_tasking_flags struct.
4321   enum {
4322     TiedFlag = 0x1,
4323     FinalFlag = 0x2,
4324     DestructorsFlag = 0x8,
4325     PriorityFlag = 0x20,
4326     DetachableFlag = 0x40,
4327   };
4328   unsigned Flags = Data.Tied ? TiedFlag : 0;
4329   bool NeedsCleanup = false;
4330   if (!Privates.empty()) {
4331     NeedsCleanup =
4332         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4333     if (NeedsCleanup)
4334       Flags = Flags | DestructorsFlag;
4335   }
4336   if (Data.Priority.getInt())
4337     Flags = Flags | PriorityFlag;
4338   if (D.hasClausesOfKind<OMPDetachClause>())
4339     Flags = Flags | DetachableFlag;
4340   llvm::Value *TaskFlags =
4341       Data.Final.getPointer()
4342           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4343                                      CGF.Builder.getInt32(FinalFlag),
4344                                      CGF.Builder.getInt32(/*C=*/0))
4345           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4346   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4347   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4348   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4349       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4350       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4351           TaskEntry, KmpRoutineEntryPtrTy)};
4352   llvm::Value *NewTask;
4353   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4354     // Check if we have any device clause associated with the directive.
4355     const Expr *Device = nullptr;
4356     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4357       Device = C->getDevice();
4358     // Emit device ID if any otherwise use default value.
4359     llvm::Value *DeviceID;
4360     if (Device)
4361       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4362                                            CGF.Int64Ty, /*isSigned=*/true);
4363     else
4364       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4365     AllocArgs.push_back(DeviceID);
4366     NewTask = CGF.EmitRuntimeCall(
4367         OMPBuilder.getOrCreateRuntimeFunction(
4368             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4369         AllocArgs);
4370   } else {
4371     NewTask =
4372         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4373                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4374                             AllocArgs);
4375   }
4376   // Emit detach clause initialization.
4377   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4378   // task_descriptor);
4379   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4380     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4381     LValue EvtLVal = CGF.EmitLValue(Evt);
4382 
4383     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4384     // int gtid, kmp_task_t *task);
4385     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4386     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4387     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4388     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4389         OMPBuilder.getOrCreateRuntimeFunction(
4390             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4391         {Loc, Tid, NewTask});
4392     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4393                                       Evt->getExprLoc());
4394     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4395   }
4396   // Process affinity clauses.
4397   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4398     // Process list of affinity data.
4399     ASTContext &C = CGM.getContext();
4400     Address AffinitiesArray = Address::invalid();
4401     // Calculate number of elements to form the array of affinity data.
4402     llvm::Value *NumOfElements = nullptr;
4403     unsigned NumAffinities = 0;
4404     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4405       if (const Expr *Modifier = C->getModifier()) {
4406         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4407         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4408           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4409           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4410           NumOfElements =
4411               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4412         }
4413       } else {
4414         NumAffinities += C->varlist_size();
4415       }
4416     }
4417     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4418     // Fields ids in kmp_task_affinity_info record.
4419     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4420 
4421     QualType KmpTaskAffinityInfoArrayTy;
4422     if (NumOfElements) {
4423       NumOfElements = CGF.Builder.CreateNUWAdd(
4424           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4425       auto *OVE = new (C) OpaqueValueExpr(
4426           Loc,
4427           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4428           VK_PRValue);
4429       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4430                                                     RValue::get(NumOfElements));
4431       KmpTaskAffinityInfoArrayTy =
4432           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4433                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4434       // Properly emit variable-sized array.
4435       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4436                                            ImplicitParamDecl::Other);
4437       CGF.EmitVarDecl(*PD);
4438       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4439       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4440                                                 /*isSigned=*/false);
4441     } else {
4442       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4443           KmpTaskAffinityInfoTy,
4444           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4445           ArrayType::Normal, /*IndexTypeQuals=*/0);
4446       AffinitiesArray =
4447           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4448       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4449       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4450                                              /*isSigned=*/false);
4451     }
4452 
4453     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4454     // Fill array by elements without iterators.
4455     unsigned Pos = 0;
4456     bool HasIterator = false;
4457     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4458       if (C->getModifier()) {
4459         HasIterator = true;
4460         continue;
4461       }
4462       for (const Expr *E : C->varlists()) {
4463         llvm::Value *Addr;
4464         llvm::Value *Size;
4465         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4466         LValue Base =
4467             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4468                                KmpTaskAffinityInfoTy);
4469         // affs[i].base_addr = &<Affinities[i].second>;
4470         LValue BaseAddrLVal = CGF.EmitLValueForField(
4471             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4472         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4473                               BaseAddrLVal);
4474         // affs[i].len = sizeof(<Affinities[i].second>);
4475         LValue LenLVal = CGF.EmitLValueForField(
4476             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4477         CGF.EmitStoreOfScalar(Size, LenLVal);
4478         ++Pos;
4479       }
4480     }
4481     LValue PosLVal;
4482     if (HasIterator) {
4483       PosLVal = CGF.MakeAddrLValue(
4484           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4485           C.getSizeType());
4486       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4487     }
4488     // Process elements with iterators.
4489     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4490       const Expr *Modifier = C->getModifier();
4491       if (!Modifier)
4492         continue;
4493       OMPIteratorGeneratorScope IteratorScope(
4494           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4495       for (const Expr *E : C->varlists()) {
4496         llvm::Value *Addr;
4497         llvm::Value *Size;
4498         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4499         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4500         LValue Base = CGF.MakeAddrLValue(
4501             Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4502                                           AffinitiesArray.getPointer(), Idx),
4503                     AffinitiesArray.getAlignment()),
4504             KmpTaskAffinityInfoTy);
4505         // affs[i].base_addr = &<Affinities[i].second>;
4506         LValue BaseAddrLVal = CGF.EmitLValueForField(
4507             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4508         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4509                               BaseAddrLVal);
4510         // affs[i].len = sizeof(<Affinities[i].second>);
4511         LValue LenLVal = CGF.EmitLValueForField(
4512             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4513         CGF.EmitStoreOfScalar(Size, LenLVal);
4514         Idx = CGF.Builder.CreateNUWAdd(
4515             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4516         CGF.EmitStoreOfScalar(Idx, PosLVal);
4517       }
4518     }
4519     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4520     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4521     // naffins, kmp_task_affinity_info_t *affin_list);
4522     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4523     llvm::Value *GTid = getThreadID(CGF, Loc);
4524     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4525         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4526     // FIXME: Emit the function and ignore its result for now unless the
4527     // runtime function is properly implemented.
4528     (void)CGF.EmitRuntimeCall(
4529         OMPBuilder.getOrCreateRuntimeFunction(
4530             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4531         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4532   }
4533   llvm::Value *NewTaskNewTaskTTy =
4534       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4535           NewTask, KmpTaskTWithPrivatesPtrTy);
4536   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4537                                                KmpTaskTWithPrivatesQTy);
4538   LValue TDBase =
4539       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4540   // Fill the data in the resulting kmp_task_t record.
4541   // Copy shareds if there are any.
4542   Address KmpTaskSharedsPtr = Address::invalid();
4543   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4544     KmpTaskSharedsPtr =
4545         Address(CGF.EmitLoadOfScalar(
4546                     CGF.EmitLValueForField(
4547                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4548                                            KmpTaskTShareds)),
4549                     Loc),
4550                 CGM.getNaturalTypeAlignment(SharedsTy));
4551     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4552     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4553     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4554   }
4555   // Emit initial values for private copies (if any).
4556   TaskResultTy Result;
4557   if (!Privates.empty()) {
4558     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4559                      SharedsTy, SharedsPtrTy, Data, Privates,
4560                      /*ForDup=*/false);
4561     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4562         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4563       Result.TaskDupFn = emitTaskDupFunction(
4564           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4565           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4566           /*WithLastIter=*/!Data.LastprivateVars.empty());
4567     }
4568   }
4569   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4570   enum { Priority = 0, Destructors = 1 };
4571   // Provide pointer to function with destructors for privates.
4572   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4573   const RecordDecl *KmpCmplrdataUD =
4574       (*FI)->getType()->getAsUnionType()->getDecl();
4575   if (NeedsCleanup) {
4576     llvm::Value *DestructorFn = emitDestructorsFunction(
4577         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4578         KmpTaskTWithPrivatesQTy);
4579     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4580     LValue DestructorsLV = CGF.EmitLValueForField(
4581         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4582     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4583                               DestructorFn, KmpRoutineEntryPtrTy),
4584                           DestructorsLV);
4585   }
4586   // Set priority.
4587   if (Data.Priority.getInt()) {
4588     LValue Data2LV = CGF.EmitLValueForField(
4589         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4590     LValue PriorityLV = CGF.EmitLValueForField(
4591         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4592     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4593   }
4594   Result.NewTask = NewTask;
4595   Result.TaskEntry = TaskEntry;
4596   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4597   Result.TDBase = TDBase;
4598   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4599   return Result;
4600 }
4601 
4602 namespace {
4603 /// Dependence kind for RTL.
4604 enum RTLDependenceKindTy {
4605   DepIn = 0x01,
4606   DepInOut = 0x3,
4607   DepMutexInOutSet = 0x4
4608 };
4609 /// Fields ids in kmp_depend_info record.
4610 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4611 } // namespace
4612 
4613 /// Translates internal dependency kind into the runtime kind.
4614 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4615   RTLDependenceKindTy DepKind;
4616   switch (K) {
4617   case OMPC_DEPEND_in:
4618     DepKind = DepIn;
4619     break;
4620   // Out and InOut dependencies must use the same code.
4621   case OMPC_DEPEND_out:
4622   case OMPC_DEPEND_inout:
4623     DepKind = DepInOut;
4624     break;
4625   case OMPC_DEPEND_mutexinoutset:
4626     DepKind = DepMutexInOutSet;
4627     break;
4628   case OMPC_DEPEND_source:
4629   case OMPC_DEPEND_sink:
4630   case OMPC_DEPEND_depobj:
4631   case OMPC_DEPEND_unknown:
4632     llvm_unreachable("Unknown task dependence type");
4633   }
4634   return DepKind;
4635 }
4636 
4637 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4638 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4639                            QualType &FlagsTy) {
4640   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4641   if (KmpDependInfoTy.isNull()) {
4642     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4643     KmpDependInfoRD->startDefinition();
4644     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4645     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4646     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4647     KmpDependInfoRD->completeDefinition();
4648     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4649   }
4650 }
4651 
4652 std::pair<llvm::Value *, LValue>
4653 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4654                                    SourceLocation Loc) {
4655   ASTContext &C = CGM.getContext();
4656   QualType FlagsTy;
4657   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4658   RecordDecl *KmpDependInfoRD =
4659       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4660   LValue Base = CGF.EmitLoadOfPointerLValue(
4661       DepobjLVal.getAddress(CGF),
4662       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4663   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4664   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4665           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4666   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4667                             Base.getTBAAInfo());
4668   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4669       Addr.getElementType(), Addr.getPointer(),
4670       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4671   LValue NumDepsBase = CGF.MakeAddrLValue(
4672       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4673       Base.getBaseInfo(), Base.getTBAAInfo());
4674   // NumDeps = deps[i].base_addr;
4675   LValue BaseAddrLVal = CGF.EmitLValueForField(
4676       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4677   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4678   return std::make_pair(NumDeps, Base);
4679 }
4680 
4681 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4682                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4683                            const OMPTaskDataTy::DependData &Data,
4684                            Address DependenciesArray) {
4685   CodeGenModule &CGM = CGF.CGM;
4686   ASTContext &C = CGM.getContext();
4687   QualType FlagsTy;
4688   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4689   RecordDecl *KmpDependInfoRD =
4690       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4691   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4692 
4693   OMPIteratorGeneratorScope IteratorScope(
4694       CGF, cast_or_null<OMPIteratorExpr>(
4695                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4696                                  : nullptr));
4697   for (const Expr *E : Data.DepExprs) {
4698     llvm::Value *Addr;
4699     llvm::Value *Size;
4700     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4701     LValue Base;
4702     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4703       Base = CGF.MakeAddrLValue(
4704           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4705     } else {
4706       LValue &PosLVal = *Pos.get<LValue *>();
4707       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4708       Base = CGF.MakeAddrLValue(
4709           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4710                                         DependenciesArray.getPointer(), Idx),
4711                   DependenciesArray.getAlignment()),
4712           KmpDependInfoTy);
4713     }
4714     // deps[i].base_addr = &<Dependencies[i].second>;
4715     LValue BaseAddrLVal = CGF.EmitLValueForField(
4716         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4717     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4718                           BaseAddrLVal);
4719     // deps[i].len = sizeof(<Dependencies[i].second>);
4720     LValue LenLVal = CGF.EmitLValueForField(
4721         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4722     CGF.EmitStoreOfScalar(Size, LenLVal);
4723     // deps[i].flags = <Dependencies[i].first>;
4724     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4725     LValue FlagsLVal = CGF.EmitLValueForField(
4726         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4727     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4728                           FlagsLVal);
4729     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4730       ++(*P);
4731     } else {
4732       LValue &PosLVal = *Pos.get<LValue *>();
4733       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4734       Idx = CGF.Builder.CreateNUWAdd(Idx,
4735                                      llvm::ConstantInt::get(Idx->getType(), 1));
4736       CGF.EmitStoreOfScalar(Idx, PosLVal);
4737     }
4738   }
4739 }
4740 
4741 static SmallVector<llvm::Value *, 4>
4742 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4743                         const OMPTaskDataTy::DependData &Data) {
4744   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4745          "Expected depobj dependecy kind.");
4746   SmallVector<llvm::Value *, 4> Sizes;
4747   SmallVector<LValue, 4> SizeLVals;
4748   ASTContext &C = CGF.getContext();
4749   QualType FlagsTy;
4750   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4751   RecordDecl *KmpDependInfoRD =
4752       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4753   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4754   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4755   {
4756     OMPIteratorGeneratorScope IteratorScope(
4757         CGF, cast_or_null<OMPIteratorExpr>(
4758                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4759                                    : nullptr));
4760     for (const Expr *E : Data.DepExprs) {
4761       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4762       LValue Base = CGF.EmitLoadOfPointerLValue(
4763           DepobjLVal.getAddress(CGF),
4764           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4765       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4766           Base.getAddress(CGF), KmpDependInfoPtrT);
4767       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4768                                 Base.getTBAAInfo());
4769       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4770           Addr.getElementType(), Addr.getPointer(),
4771           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4772       LValue NumDepsBase = CGF.MakeAddrLValue(
4773           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4774           Base.getBaseInfo(), Base.getTBAAInfo());
4775       // NumDeps = deps[i].base_addr;
4776       LValue BaseAddrLVal = CGF.EmitLValueForField(
4777           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4778       llvm::Value *NumDeps =
4779           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4780       LValue NumLVal = CGF.MakeAddrLValue(
4781           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4782           C.getUIntPtrType());
4783       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4784                               NumLVal.getAddress(CGF));
4785       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4786       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4787       CGF.EmitStoreOfScalar(Add, NumLVal);
4788       SizeLVals.push_back(NumLVal);
4789     }
4790   }
4791   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4792     llvm::Value *Size =
4793         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4794     Sizes.push_back(Size);
4795   }
4796   return Sizes;
4797 }
4798 
4799 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4800                                LValue PosLVal,
4801                                const OMPTaskDataTy::DependData &Data,
4802                                Address DependenciesArray) {
4803   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4804          "Expected depobj dependecy kind.");
4805   ASTContext &C = CGF.getContext();
4806   QualType FlagsTy;
4807   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4808   RecordDecl *KmpDependInfoRD =
4809       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4810   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4811   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4812   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4813   {
4814     OMPIteratorGeneratorScope IteratorScope(
4815         CGF, cast_or_null<OMPIteratorExpr>(
4816                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4817                                    : nullptr));
4818     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4819       const Expr *E = Data.DepExprs[I];
4820       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4821       LValue Base = CGF.EmitLoadOfPointerLValue(
4822           DepobjLVal.getAddress(CGF),
4823           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4824       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4825           Base.getAddress(CGF), KmpDependInfoPtrT);
4826       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4827                                 Base.getTBAAInfo());
4828 
4829       // Get number of elements in a single depobj.
4830       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4831           Addr.getElementType(), Addr.getPointer(),
4832           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4833       LValue NumDepsBase = CGF.MakeAddrLValue(
4834           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4835           Base.getBaseInfo(), Base.getTBAAInfo());
4836       // NumDeps = deps[i].base_addr;
4837       LValue BaseAddrLVal = CGF.EmitLValueForField(
4838           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4839       llvm::Value *NumDeps =
4840           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4841 
4842       // memcopy dependency data.
4843       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4844           ElSize,
4845           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4846       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4847       Address DepAddr =
4848           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4849                                         DependenciesArray.getPointer(), Pos),
4850                   DependenciesArray.getAlignment());
4851       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4852 
4853       // Increase pos.
4854       // pos += size;
4855       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4856       CGF.EmitStoreOfScalar(Add, PosLVal);
4857     }
4858   }
4859 }
4860 
4861 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4862     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4863     SourceLocation Loc) {
4864   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4865         return D.DepExprs.empty();
4866       }))
4867     return std::make_pair(nullptr, Address::invalid());
4868   // Process list of dependencies.
4869   ASTContext &C = CGM.getContext();
4870   Address DependenciesArray = Address::invalid();
4871   llvm::Value *NumOfElements = nullptr;
4872   unsigned NumDependencies = std::accumulate(
4873       Dependencies.begin(), Dependencies.end(), 0,
4874       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4875         return D.DepKind == OMPC_DEPEND_depobj
4876                    ? V
4877                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4878       });
4879   QualType FlagsTy;
4880   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4881   bool HasDepobjDeps = false;
4882   bool HasRegularWithIterators = false;
4883   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4884   llvm::Value *NumOfRegularWithIterators =
4885       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4886   // Calculate number of depobj dependecies and regular deps with the iterators.
4887   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4888     if (D.DepKind == OMPC_DEPEND_depobj) {
4889       SmallVector<llvm::Value *, 4> Sizes =
4890           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4891       for (llvm::Value *Size : Sizes) {
4892         NumOfDepobjElements =
4893             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4894       }
4895       HasDepobjDeps = true;
4896       continue;
4897     }
4898     // Include number of iterations, if any.
4899 
4900     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4901       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4902         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4903         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4904         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4905             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4906         NumOfRegularWithIterators =
4907             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4908       }
4909       HasRegularWithIterators = true;
4910       continue;
4911     }
4912   }
4913 
4914   QualType KmpDependInfoArrayTy;
4915   if (HasDepobjDeps || HasRegularWithIterators) {
4916     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4917                                            /*isSigned=*/false);
4918     if (HasDepobjDeps) {
4919       NumOfElements =
4920           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4921     }
4922     if (HasRegularWithIterators) {
4923       NumOfElements =
4924           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4925     }
4926     auto *OVE = new (C) OpaqueValueExpr(
4927         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4928         VK_PRValue);
4929     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4930                                                   RValue::get(NumOfElements));
4931     KmpDependInfoArrayTy =
4932         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4933                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4934     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4935     // Properly emit variable-sized array.
4936     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4937                                          ImplicitParamDecl::Other);
4938     CGF.EmitVarDecl(*PD);
4939     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4940     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4941                                               /*isSigned=*/false);
4942   } else {
4943     KmpDependInfoArrayTy = C.getConstantArrayType(
4944         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4945         ArrayType::Normal, /*IndexTypeQuals=*/0);
4946     DependenciesArray =
4947         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4948     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4949     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4950                                            /*isSigned=*/false);
4951   }
4952   unsigned Pos = 0;
4953   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4954     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4955         Dependencies[I].IteratorExpr)
4956       continue;
4957     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4958                    DependenciesArray);
4959   }
4960   // Copy regular dependecies with iterators.
4961   LValue PosLVal = CGF.MakeAddrLValue(
4962       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4963   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4964   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4965     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4966         !Dependencies[I].IteratorExpr)
4967       continue;
4968     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4969                    DependenciesArray);
4970   }
4971   // Copy final depobj arrays without iterators.
4972   if (HasDepobjDeps) {
4973     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4974       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4975         continue;
4976       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4977                          DependenciesArray);
4978     }
4979   }
4980   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4981       DependenciesArray, CGF.VoidPtrTy);
4982   return std::make_pair(NumOfElements, DependenciesArray);
4983 }
4984 
4985 Address CGOpenMPRuntime::emitDepobjDependClause(
4986     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4987     SourceLocation Loc) {
4988   if (Dependencies.DepExprs.empty())
4989     return Address::invalid();
4990   // Process list of dependencies.
4991   ASTContext &C = CGM.getContext();
4992   Address DependenciesArray = Address::invalid();
4993   unsigned NumDependencies = Dependencies.DepExprs.size();
4994   QualType FlagsTy;
4995   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4996   RecordDecl *KmpDependInfoRD =
4997       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4998 
4999   llvm::Value *Size;
5000   // Define type kmp_depend_info[<Dependencies.size()>];
5001   // For depobj reserve one extra element to store the number of elements.
5002   // It is required to handle depobj(x) update(in) construct.
5003   // kmp_depend_info[<Dependencies.size()>] deps;
5004   llvm::Value *NumDepsVal;
5005   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
5006   if (const auto *IE =
5007           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
5008     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
5009     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5010       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5011       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5012       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5013     }
5014     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5015                                     NumDepsVal);
5016     CharUnits SizeInBytes =
5017         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5018     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5019     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5020     NumDepsVal =
5021         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5022   } else {
5023     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5024         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5025         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5026     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5027     Size = CGM.getSize(Sz.alignTo(Align));
5028     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5029   }
5030   // Need to allocate on the dynamic memory.
5031   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5032   // Use default allocator.
5033   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5034   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5035 
5036   llvm::Value *Addr =
5037       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5038                               CGM.getModule(), OMPRTL___kmpc_alloc),
5039                           Args, ".dep.arr.addr");
5040   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5041       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5042   DependenciesArray = Address(Addr, Align);
5043   // Write number of elements in the first element of array for depobj.
5044   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5045   // deps[i].base_addr = NumDependencies;
5046   LValue BaseAddrLVal = CGF.EmitLValueForField(
5047       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5048   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5049   llvm::PointerUnion<unsigned *, LValue *> Pos;
5050   unsigned Idx = 1;
5051   LValue PosLVal;
5052   if (Dependencies.IteratorExpr) {
5053     PosLVal = CGF.MakeAddrLValue(
5054         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5055         C.getSizeType());
5056     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5057                           /*IsInit=*/true);
5058     Pos = &PosLVal;
5059   } else {
5060     Pos = &Idx;
5061   }
5062   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5063   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5064       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5065   return DependenciesArray;
5066 }
5067 
5068 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5069                                         SourceLocation Loc) {
5070   ASTContext &C = CGM.getContext();
5071   QualType FlagsTy;
5072   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5073   LValue Base = CGF.EmitLoadOfPointerLValue(
5074       DepobjLVal.getAddress(CGF),
5075       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5076   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5077   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5078       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5079   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5080       Addr.getElementType(), Addr.getPointer(),
5081       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5082   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5083                                                                CGF.VoidPtrTy);
5084   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5085   // Use default allocator.
5086   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5087   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5088 
5089   // _kmpc_free(gtid, addr, nullptr);
5090   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5091                                 CGM.getModule(), OMPRTL___kmpc_free),
5092                             Args);
5093 }
5094 
5095 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5096                                        OpenMPDependClauseKind NewDepKind,
5097                                        SourceLocation Loc) {
5098   ASTContext &C = CGM.getContext();
5099   QualType FlagsTy;
5100   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5101   RecordDecl *KmpDependInfoRD =
5102       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5103   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5104   llvm::Value *NumDeps;
5105   LValue Base;
5106   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5107 
5108   Address Begin = Base.getAddress(CGF);
5109   // Cast from pointer to array type to pointer to single element.
5110   llvm::Value *End = CGF.Builder.CreateGEP(
5111       Begin.getElementType(), Begin.getPointer(), NumDeps);
5112   // The basic structure here is a while-do loop.
5113   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5114   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5115   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5116   CGF.EmitBlock(BodyBB);
5117   llvm::PHINode *ElementPHI =
5118       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5119   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5120   Begin = Address(ElementPHI, Begin.getAlignment());
5121   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5122                             Base.getTBAAInfo());
5123   // deps[i].flags = NewDepKind;
5124   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5125   LValue FlagsLVal = CGF.EmitLValueForField(
5126       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5127   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5128                         FlagsLVal);
5129 
5130   // Shift the address forward by one element.
5131   Address ElementNext =
5132       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5133   ElementPHI->addIncoming(ElementNext.getPointer(),
5134                           CGF.Builder.GetInsertBlock());
5135   llvm::Value *IsEmpty =
5136       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5137   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5138   // Done.
5139   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5140 }
5141 
5142 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5143                                    const OMPExecutableDirective &D,
5144                                    llvm::Function *TaskFunction,
5145                                    QualType SharedsTy, Address Shareds,
5146                                    const Expr *IfCond,
5147                                    const OMPTaskDataTy &Data) {
5148   if (!CGF.HaveInsertPoint())
5149     return;
5150 
5151   TaskResultTy Result =
5152       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5153   llvm::Value *NewTask = Result.NewTask;
5154   llvm::Function *TaskEntry = Result.TaskEntry;
5155   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5156   LValue TDBase = Result.TDBase;
5157   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5158   // Process list of dependences.
5159   Address DependenciesArray = Address::invalid();
5160   llvm::Value *NumOfElements;
5161   std::tie(NumOfElements, DependenciesArray) =
5162       emitDependClause(CGF, Data.Dependences, Loc);
5163 
5164   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5165   // libcall.
5166   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5167   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5168   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5169   // list is not empty
5170   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5171   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5172   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5173   llvm::Value *DepTaskArgs[7];
5174   if (!Data.Dependences.empty()) {
5175     DepTaskArgs[0] = UpLoc;
5176     DepTaskArgs[1] = ThreadID;
5177     DepTaskArgs[2] = NewTask;
5178     DepTaskArgs[3] = NumOfElements;
5179     DepTaskArgs[4] = DependenciesArray.getPointer();
5180     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5181     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5182   }
5183   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5184                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5185     if (!Data.Tied) {
5186       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5187       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5188       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5189     }
5190     if (!Data.Dependences.empty()) {
5191       CGF.EmitRuntimeCall(
5192           OMPBuilder.getOrCreateRuntimeFunction(
5193               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5194           DepTaskArgs);
5195     } else {
5196       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5197                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5198                           TaskArgs);
5199     }
5200     // Check if parent region is untied and build return for untied task;
5201     if (auto *Region =
5202             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5203       Region->emitUntiedSwitch(CGF);
5204   };
5205 
5206   llvm::Value *DepWaitTaskArgs[6];
5207   if (!Data.Dependences.empty()) {
5208     DepWaitTaskArgs[0] = UpLoc;
5209     DepWaitTaskArgs[1] = ThreadID;
5210     DepWaitTaskArgs[2] = NumOfElements;
5211     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5212     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5213     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5214   }
5215   auto &M = CGM.getModule();
5216   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5217                         TaskEntry, &Data, &DepWaitTaskArgs,
5218                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5219     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5220     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5221     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5222     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5223     // is specified.
5224     if (!Data.Dependences.empty())
5225       CGF.EmitRuntimeCall(
5226           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5227           DepWaitTaskArgs);
5228     // Call proxy_task_entry(gtid, new_task);
5229     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5230                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5231       Action.Enter(CGF);
5232       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5233       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5234                                                           OutlinedFnArgs);
5235     };
5236 
5237     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5238     // kmp_task_t *new_task);
5239     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5240     // kmp_task_t *new_task);
5241     RegionCodeGenTy RCG(CodeGen);
5242     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5243                               M, OMPRTL___kmpc_omp_task_begin_if0),
5244                           TaskArgs,
5245                           OMPBuilder.getOrCreateRuntimeFunction(
5246                               M, OMPRTL___kmpc_omp_task_complete_if0),
5247                           TaskArgs);
5248     RCG.setAction(Action);
5249     RCG(CGF);
5250   };
5251 
5252   if (IfCond) {
5253     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5254   } else {
5255     RegionCodeGenTy ThenRCG(ThenCodeGen);
5256     ThenRCG(CGF);
5257   }
5258 }
5259 
5260 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5261                                        const OMPLoopDirective &D,
5262                                        llvm::Function *TaskFunction,
5263                                        QualType SharedsTy, Address Shareds,
5264                                        const Expr *IfCond,
5265                                        const OMPTaskDataTy &Data) {
5266   if (!CGF.HaveInsertPoint())
5267     return;
5268   TaskResultTy Result =
5269       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5270   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5271   // libcall.
5272   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5273   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5274   // sched, kmp_uint64 grainsize, void *task_dup);
5275   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5276   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5277   llvm::Value *IfVal;
5278   if (IfCond) {
5279     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5280                                       /*isSigned=*/true);
5281   } else {
5282     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5283   }
5284 
5285   LValue LBLVal = CGF.EmitLValueForField(
5286       Result.TDBase,
5287       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5288   const auto *LBVar =
5289       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5290   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5291                        LBLVal.getQuals(),
5292                        /*IsInitializer=*/true);
5293   LValue UBLVal = CGF.EmitLValueForField(
5294       Result.TDBase,
5295       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5296   const auto *UBVar =
5297       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5298   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5299                        UBLVal.getQuals(),
5300                        /*IsInitializer=*/true);
5301   LValue StLVal = CGF.EmitLValueForField(
5302       Result.TDBase,
5303       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5304   const auto *StVar =
5305       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5306   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5307                        StLVal.getQuals(),
5308                        /*IsInitializer=*/true);
5309   // Store reductions address.
5310   LValue RedLVal = CGF.EmitLValueForField(
5311       Result.TDBase,
5312       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5313   if (Data.Reductions) {
5314     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5315   } else {
5316     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5317                                CGF.getContext().VoidPtrTy);
5318   }
5319   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5320   llvm::Value *TaskArgs[] = {
5321       UpLoc,
5322       ThreadID,
5323       Result.NewTask,
5324       IfVal,
5325       LBLVal.getPointer(CGF),
5326       UBLVal.getPointer(CGF),
5327       CGF.EmitLoadOfScalar(StLVal, Loc),
5328       llvm::ConstantInt::getSigned(
5329           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5330       llvm::ConstantInt::getSigned(
5331           CGF.IntTy, Data.Schedule.getPointer()
5332                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5333                          : NoSchedule),
5334       Data.Schedule.getPointer()
5335           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5336                                       /*isSigned=*/false)
5337           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5338       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5339                              Result.TaskDupFn, CGF.VoidPtrTy)
5340                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5341   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5342                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5343                       TaskArgs);
5344 }
5345 
5346 /// Emit reduction operation for each element of array (required for
5347 /// array sections) LHS op = RHS.
5348 /// \param Type Type of array.
5349 /// \param LHSVar Variable on the left side of the reduction operation
5350 /// (references element of array in original variable).
5351 /// \param RHSVar Variable on the right side of the reduction operation
5352 /// (references element of array in original variable).
5353 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5354 /// RHSVar.
5355 static void EmitOMPAggregateReduction(
5356     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5357     const VarDecl *RHSVar,
5358     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5359                                   const Expr *, const Expr *)> &RedOpGen,
5360     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5361     const Expr *UpExpr = nullptr) {
5362   // Perform element-by-element initialization.
5363   QualType ElementTy;
5364   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5365   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5366 
5367   // Drill down to the base element type on both arrays.
5368   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5369   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5370 
5371   llvm::Value *RHSBegin = RHSAddr.getPointer();
5372   llvm::Value *LHSBegin = LHSAddr.getPointer();
5373   // Cast from pointer to array type to pointer to single element.
5374   llvm::Value *LHSEnd =
5375       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5376   // The basic structure here is a while-do loop.
5377   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5378   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5379   llvm::Value *IsEmpty =
5380       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5381   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5382 
5383   // Enter the loop body, making that address the current address.
5384   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5385   CGF.EmitBlock(BodyBB);
5386 
5387   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5388 
5389   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5390       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5391   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5392   Address RHSElementCurrent =
5393       Address(RHSElementPHI,
5394               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5395 
5396   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5397       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5398   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5399   Address LHSElementCurrent =
5400       Address(LHSElementPHI,
5401               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5402 
5403   // Emit copy.
5404   CodeGenFunction::OMPPrivateScope Scope(CGF);
5405   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5406   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5407   Scope.Privatize();
5408   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5409   Scope.ForceCleanup();
5410 
5411   // Shift the address forward by one element.
5412   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5413       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5414       "omp.arraycpy.dest.element");
5415   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5416       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5417       "omp.arraycpy.src.element");
5418   // Check whether we've reached the end.
5419   llvm::Value *Done =
5420       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5421   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5422   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5423   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5424 
5425   // Done.
5426   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5427 }
5428 
5429 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5430 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5431 /// UDR combiner function.
5432 static void emitReductionCombiner(CodeGenFunction &CGF,
5433                                   const Expr *ReductionOp) {
5434   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5435     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5436       if (const auto *DRE =
5437               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5438         if (const auto *DRD =
5439                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5440           std::pair<llvm::Function *, llvm::Function *> Reduction =
5441               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5442           RValue Func = RValue::get(Reduction.first);
5443           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5444           CGF.EmitIgnoredExpr(ReductionOp);
5445           return;
5446         }
5447   CGF.EmitIgnoredExpr(ReductionOp);
5448 }
5449 
5450 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5451     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5452     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5453     ArrayRef<const Expr *> ReductionOps) {
5454   ASTContext &C = CGM.getContext();
5455 
5456   // void reduction_func(void *LHSArg, void *RHSArg);
5457   FunctionArgList Args;
5458   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5459                            ImplicitParamDecl::Other);
5460   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5461                            ImplicitParamDecl::Other);
5462   Args.push_back(&LHSArg);
5463   Args.push_back(&RHSArg);
5464   const auto &CGFI =
5465       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5466   std::string Name = getName({"omp", "reduction", "reduction_func"});
5467   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5468                                     llvm::GlobalValue::InternalLinkage, Name,
5469                                     &CGM.getModule());
5470   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5471   Fn->setDoesNotRecurse();
5472   CodeGenFunction CGF(CGM);
5473   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5474 
5475   // Dst = (void*[n])(LHSArg);
5476   // Src = (void*[n])(RHSArg);
5477   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5478       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5479       ArgsType), CGF.getPointerAlign());
5480   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5481       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5482       ArgsType), CGF.getPointerAlign());
5483 
5484   //  ...
5485   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5486   //  ...
5487   CodeGenFunction::OMPPrivateScope Scope(CGF);
5488   auto IPriv = Privates.begin();
5489   unsigned Idx = 0;
5490   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5491     const auto *RHSVar =
5492         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5493     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5494       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5495     });
5496     const auto *LHSVar =
5497         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5498     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5499       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5500     });
5501     QualType PrivTy = (*IPriv)->getType();
5502     if (PrivTy->isVariablyModifiedType()) {
5503       // Get array size and emit VLA type.
5504       ++Idx;
5505       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5506       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5507       const VariableArrayType *VLA =
5508           CGF.getContext().getAsVariableArrayType(PrivTy);
5509       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5510       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5511           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5512       CGF.EmitVariablyModifiedType(PrivTy);
5513     }
5514   }
5515   Scope.Privatize();
5516   IPriv = Privates.begin();
5517   auto ILHS = LHSExprs.begin();
5518   auto IRHS = RHSExprs.begin();
5519   for (const Expr *E : ReductionOps) {
5520     if ((*IPriv)->getType()->isArrayType()) {
5521       // Emit reduction for array section.
5522       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5523       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5524       EmitOMPAggregateReduction(
5525           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5526           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5527             emitReductionCombiner(CGF, E);
5528           });
5529     } else {
5530       // Emit reduction for array subscript or single variable.
5531       emitReductionCombiner(CGF, E);
5532     }
5533     ++IPriv;
5534     ++ILHS;
5535     ++IRHS;
5536   }
5537   Scope.ForceCleanup();
5538   CGF.FinishFunction();
5539   return Fn;
5540 }
5541 
5542 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5543                                                   const Expr *ReductionOp,
5544                                                   const Expr *PrivateRef,
5545                                                   const DeclRefExpr *LHS,
5546                                                   const DeclRefExpr *RHS) {
5547   if (PrivateRef->getType()->isArrayType()) {
5548     // Emit reduction for array section.
5549     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5550     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5551     EmitOMPAggregateReduction(
5552         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5553         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5554           emitReductionCombiner(CGF, ReductionOp);
5555         });
5556   } else {
5557     // Emit reduction for array subscript or single variable.
5558     emitReductionCombiner(CGF, ReductionOp);
5559   }
5560 }
5561 
5562 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5563                                     ArrayRef<const Expr *> Privates,
5564                                     ArrayRef<const Expr *> LHSExprs,
5565                                     ArrayRef<const Expr *> RHSExprs,
5566                                     ArrayRef<const Expr *> ReductionOps,
5567                                     ReductionOptionsTy Options) {
5568   if (!CGF.HaveInsertPoint())
5569     return;
5570 
5571   bool WithNowait = Options.WithNowait;
5572   bool SimpleReduction = Options.SimpleReduction;
5573 
5574   // Next code should be emitted for reduction:
5575   //
5576   // static kmp_critical_name lock = { 0 };
5577   //
5578   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5579   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5580   //  ...
5581   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5582   //  *(Type<n>-1*)rhs[<n>-1]);
5583   // }
5584   //
5585   // ...
5586   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5587   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5588   // RedList, reduce_func, &<lock>)) {
5589   // case 1:
5590   //  ...
5591   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5592   //  ...
5593   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5594   // break;
5595   // case 2:
5596   //  ...
5597   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5598   //  ...
5599   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5600   // break;
5601   // default:;
5602   // }
5603   //
5604   // if SimpleReduction is true, only the next code is generated:
5605   //  ...
5606   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5607   //  ...
5608 
5609   ASTContext &C = CGM.getContext();
5610 
5611   if (SimpleReduction) {
5612     CodeGenFunction::RunCleanupsScope Scope(CGF);
5613     auto IPriv = Privates.begin();
5614     auto ILHS = LHSExprs.begin();
5615     auto IRHS = RHSExprs.begin();
5616     for (const Expr *E : ReductionOps) {
5617       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5618                                   cast<DeclRefExpr>(*IRHS));
5619       ++IPriv;
5620       ++ILHS;
5621       ++IRHS;
5622     }
5623     return;
5624   }
5625 
5626   // 1. Build a list of reduction variables.
5627   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5628   auto Size = RHSExprs.size();
5629   for (const Expr *E : Privates) {
5630     if (E->getType()->isVariablyModifiedType())
5631       // Reserve place for array size.
5632       ++Size;
5633   }
5634   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5635   QualType ReductionArrayTy =
5636       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5637                              /*IndexTypeQuals=*/0);
5638   Address ReductionList =
5639       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5640   auto IPriv = Privates.begin();
5641   unsigned Idx = 0;
5642   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5643     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5644     CGF.Builder.CreateStore(
5645         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5646             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5647         Elem);
5648     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5649       // Store array size.
5650       ++Idx;
5651       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5652       llvm::Value *Size = CGF.Builder.CreateIntCast(
5653           CGF.getVLASize(
5654                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5655               .NumElts,
5656           CGF.SizeTy, /*isSigned=*/false);
5657       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5658                               Elem);
5659     }
5660   }
5661 
5662   // 2. Emit reduce_func().
5663   llvm::Function *ReductionFn = emitReductionFunction(
5664       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5665       LHSExprs, RHSExprs, ReductionOps);
5666 
5667   // 3. Create static kmp_critical_name lock = { 0 };
5668   std::string Name = getName({"reduction"});
5669   llvm::Value *Lock = getCriticalRegionLock(Name);
5670 
5671   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5672   // RedList, reduce_func, &<lock>);
5673   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5674   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5675   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5676   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5677       ReductionList.getPointer(), CGF.VoidPtrTy);
5678   llvm::Value *Args[] = {
5679       IdentTLoc,                             // ident_t *<loc>
5680       ThreadId,                              // i32 <gtid>
5681       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5682       ReductionArrayTySize,                  // size_type sizeof(RedList)
5683       RL,                                    // void *RedList
5684       ReductionFn, // void (*) (void *, void *) <reduce_func>
5685       Lock         // kmp_critical_name *&<lock>
5686   };
5687   llvm::Value *Res = CGF.EmitRuntimeCall(
5688       OMPBuilder.getOrCreateRuntimeFunction(
5689           CGM.getModule(),
5690           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5691       Args);
5692 
5693   // 5. Build switch(res)
5694   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5695   llvm::SwitchInst *SwInst =
5696       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5697 
5698   // 6. Build case 1:
5699   //  ...
5700   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5701   //  ...
5702   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5703   // break;
5704   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5705   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5706   CGF.EmitBlock(Case1BB);
5707 
5708   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5709   llvm::Value *EndArgs[] = {
5710       IdentTLoc, // ident_t *<loc>
5711       ThreadId,  // i32 <gtid>
5712       Lock       // kmp_critical_name *&<lock>
5713   };
5714   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5715                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5716     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5717     auto IPriv = Privates.begin();
5718     auto ILHS = LHSExprs.begin();
5719     auto IRHS = RHSExprs.begin();
5720     for (const Expr *E : ReductionOps) {
5721       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5722                                      cast<DeclRefExpr>(*IRHS));
5723       ++IPriv;
5724       ++ILHS;
5725       ++IRHS;
5726     }
5727   };
5728   RegionCodeGenTy RCG(CodeGen);
5729   CommonActionTy Action(
5730       nullptr, llvm::None,
5731       OMPBuilder.getOrCreateRuntimeFunction(
5732           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5733                                       : OMPRTL___kmpc_end_reduce),
5734       EndArgs);
5735   RCG.setAction(Action);
5736   RCG(CGF);
5737 
5738   CGF.EmitBranch(DefaultBB);
5739 
5740   // 7. Build case 2:
5741   //  ...
5742   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5743   //  ...
5744   // break;
5745   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5746   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5747   CGF.EmitBlock(Case2BB);
5748 
5749   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5750                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5751     auto ILHS = LHSExprs.begin();
5752     auto IRHS = RHSExprs.begin();
5753     auto IPriv = Privates.begin();
5754     for (const Expr *E : ReductionOps) {
5755       const Expr *XExpr = nullptr;
5756       const Expr *EExpr = nullptr;
5757       const Expr *UpExpr = nullptr;
5758       BinaryOperatorKind BO = BO_Comma;
5759       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5760         if (BO->getOpcode() == BO_Assign) {
5761           XExpr = BO->getLHS();
5762           UpExpr = BO->getRHS();
5763         }
5764       }
5765       // Try to emit update expression as a simple atomic.
5766       const Expr *RHSExpr = UpExpr;
5767       if (RHSExpr) {
5768         // Analyze RHS part of the whole expression.
5769         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5770                 RHSExpr->IgnoreParenImpCasts())) {
5771           // If this is a conditional operator, analyze its condition for
5772           // min/max reduction operator.
5773           RHSExpr = ACO->getCond();
5774         }
5775         if (const auto *BORHS =
5776                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5777           EExpr = BORHS->getRHS();
5778           BO = BORHS->getOpcode();
5779         }
5780       }
5781       if (XExpr) {
5782         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5783         auto &&AtomicRedGen = [BO, VD,
5784                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5785                                     const Expr *EExpr, const Expr *UpExpr) {
5786           LValue X = CGF.EmitLValue(XExpr);
5787           RValue E;
5788           if (EExpr)
5789             E = CGF.EmitAnyExpr(EExpr);
5790           CGF.EmitOMPAtomicSimpleUpdateExpr(
5791               X, E, BO, /*IsXLHSInRHSPart=*/true,
5792               llvm::AtomicOrdering::Monotonic, Loc,
5793               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5794                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5795                 PrivateScope.addPrivate(
5796                     VD, [&CGF, VD, XRValue, Loc]() {
5797                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5798                       CGF.emitOMPSimpleStore(
5799                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5800                           VD->getType().getNonReferenceType(), Loc);
5801                       return LHSTemp;
5802                     });
5803                 (void)PrivateScope.Privatize();
5804                 return CGF.EmitAnyExpr(UpExpr);
5805               });
5806         };
5807         if ((*IPriv)->getType()->isArrayType()) {
5808           // Emit atomic reduction for array section.
5809           const auto *RHSVar =
5810               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5811           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5812                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5813         } else {
5814           // Emit atomic reduction for array subscript or single variable.
5815           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5816         }
5817       } else {
5818         // Emit as a critical region.
5819         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5820                                            const Expr *, const Expr *) {
5821           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5822           std::string Name = RT.getName({"atomic_reduction"});
5823           RT.emitCriticalRegion(
5824               CGF, Name,
5825               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5826                 Action.Enter(CGF);
5827                 emitReductionCombiner(CGF, E);
5828               },
5829               Loc);
5830         };
5831         if ((*IPriv)->getType()->isArrayType()) {
5832           const auto *LHSVar =
5833               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5834           const auto *RHSVar =
5835               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5836           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5837                                     CritRedGen);
5838         } else {
5839           CritRedGen(CGF, nullptr, nullptr, nullptr);
5840         }
5841       }
5842       ++ILHS;
5843       ++IRHS;
5844       ++IPriv;
5845     }
5846   };
5847   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5848   if (!WithNowait) {
5849     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5850     llvm::Value *EndArgs[] = {
5851         IdentTLoc, // ident_t *<loc>
5852         ThreadId,  // i32 <gtid>
5853         Lock       // kmp_critical_name *&<lock>
5854     };
5855     CommonActionTy Action(nullptr, llvm::None,
5856                           OMPBuilder.getOrCreateRuntimeFunction(
5857                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5858                           EndArgs);
5859     AtomicRCG.setAction(Action);
5860     AtomicRCG(CGF);
5861   } else {
5862     AtomicRCG(CGF);
5863   }
5864 
5865   CGF.EmitBranch(DefaultBB);
5866   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5867 }
5868 
5869 /// Generates unique name for artificial threadprivate variables.
5870 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5871 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5872                                       const Expr *Ref) {
5873   SmallString<256> Buffer;
5874   llvm::raw_svector_ostream Out(Buffer);
5875   const clang::DeclRefExpr *DE;
5876   const VarDecl *D = ::getBaseDecl(Ref, DE);
5877   if (!D)
5878     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5879   D = D->getCanonicalDecl();
5880   std::string Name = CGM.getOpenMPRuntime().getName(
5881       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5882   Out << Prefix << Name << "_"
5883       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5884   return std::string(Out.str());
5885 }
5886 
5887 /// Emits reduction initializer function:
5888 /// \code
5889 /// void @.red_init(void* %arg, void* %orig) {
5890 /// %0 = bitcast void* %arg to <type>*
5891 /// store <type> <init>, <type>* %0
5892 /// ret void
5893 /// }
5894 /// \endcode
5895 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5896                                            SourceLocation Loc,
5897                                            ReductionCodeGen &RCG, unsigned N) {
5898   ASTContext &C = CGM.getContext();
5899   QualType VoidPtrTy = C.VoidPtrTy;
5900   VoidPtrTy.addRestrict();
5901   FunctionArgList Args;
5902   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5903                           ImplicitParamDecl::Other);
5904   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5905                               ImplicitParamDecl::Other);
5906   Args.emplace_back(&Param);
5907   Args.emplace_back(&ParamOrig);
5908   const auto &FnInfo =
5909       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5910   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5911   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5912   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5913                                     Name, &CGM.getModule());
5914   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5915   Fn->setDoesNotRecurse();
5916   CodeGenFunction CGF(CGM);
5917   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5918   Address PrivateAddr = CGF.EmitLoadOfPointer(
5919       CGF.GetAddrOfLocalVar(&Param),
5920       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5921   llvm::Value *Size = nullptr;
5922   // If the size of the reduction item is non-constant, load it from global
5923   // threadprivate variable.
5924   if (RCG.getSizes(N).second) {
5925     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5926         CGF, CGM.getContext().getSizeType(),
5927         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5928     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5929                                 CGM.getContext().getSizeType(), Loc);
5930   }
5931   RCG.emitAggregateType(CGF, N, Size);
5932   LValue OrigLVal;
5933   // If initializer uses initializer from declare reduction construct, emit a
5934   // pointer to the address of the original reduction item (reuired by reduction
5935   // initializer)
5936   if (RCG.usesReductionInitializer(N)) {
5937     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5938     SharedAddr = CGF.EmitLoadOfPointer(
5939         SharedAddr,
5940         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5941     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5942   } else {
5943     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5944         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5945         CGM.getContext().VoidPtrTy);
5946   }
5947   // Emit the initializer:
5948   // %0 = bitcast void* %arg to <type>*
5949   // store <type> <init>, <type>* %0
5950   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5951                          [](CodeGenFunction &) { return false; });
5952   CGF.FinishFunction();
5953   return Fn;
5954 }
5955 
5956 /// Emits reduction combiner function:
5957 /// \code
5958 /// void @.red_comb(void* %arg0, void* %arg1) {
5959 /// %lhs = bitcast void* %arg0 to <type>*
5960 /// %rhs = bitcast void* %arg1 to <type>*
5961 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5962 /// store <type> %2, <type>* %lhs
5963 /// ret void
5964 /// }
5965 /// \endcode
5966 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5967                                            SourceLocation Loc,
5968                                            ReductionCodeGen &RCG, unsigned N,
5969                                            const Expr *ReductionOp,
5970                                            const Expr *LHS, const Expr *RHS,
5971                                            const Expr *PrivateRef) {
5972   ASTContext &C = CGM.getContext();
5973   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5974   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5975   FunctionArgList Args;
5976   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5977                                C.VoidPtrTy, ImplicitParamDecl::Other);
5978   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5979                             ImplicitParamDecl::Other);
5980   Args.emplace_back(&ParamInOut);
5981   Args.emplace_back(&ParamIn);
5982   const auto &FnInfo =
5983       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5984   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5985   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5986   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5987                                     Name, &CGM.getModule());
5988   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5989   Fn->setDoesNotRecurse();
5990   CodeGenFunction CGF(CGM);
5991   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5992   llvm::Value *Size = nullptr;
5993   // If the size of the reduction item is non-constant, load it from global
5994   // threadprivate variable.
5995   if (RCG.getSizes(N).second) {
5996     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5997         CGF, CGM.getContext().getSizeType(),
5998         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5999     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6000                                 CGM.getContext().getSizeType(), Loc);
6001   }
6002   RCG.emitAggregateType(CGF, N, Size);
6003   // Remap lhs and rhs variables to the addresses of the function arguments.
6004   // %lhs = bitcast void* %arg0 to <type>*
6005   // %rhs = bitcast void* %arg1 to <type>*
6006   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6007   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6008     // Pull out the pointer to the variable.
6009     Address PtrAddr = CGF.EmitLoadOfPointer(
6010         CGF.GetAddrOfLocalVar(&ParamInOut),
6011         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6012     return CGF.Builder.CreateElementBitCast(
6013         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6014   });
6015   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6016     // Pull out the pointer to the variable.
6017     Address PtrAddr = CGF.EmitLoadOfPointer(
6018         CGF.GetAddrOfLocalVar(&ParamIn),
6019         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6020     return CGF.Builder.CreateElementBitCast(
6021         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6022   });
6023   PrivateScope.Privatize();
6024   // Emit the combiner body:
6025   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6026   // store <type> %2, <type>* %lhs
6027   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6028       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6029       cast<DeclRefExpr>(RHS));
6030   CGF.FinishFunction();
6031   return Fn;
6032 }
6033 
6034 /// Emits reduction finalizer function:
6035 /// \code
6036 /// void @.red_fini(void* %arg) {
6037 /// %0 = bitcast void* %arg to <type>*
6038 /// <destroy>(<type>* %0)
6039 /// ret void
6040 /// }
6041 /// \endcode
6042 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6043                                            SourceLocation Loc,
6044                                            ReductionCodeGen &RCG, unsigned N) {
6045   if (!RCG.needCleanups(N))
6046     return nullptr;
6047   ASTContext &C = CGM.getContext();
6048   FunctionArgList Args;
6049   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6050                           ImplicitParamDecl::Other);
6051   Args.emplace_back(&Param);
6052   const auto &FnInfo =
6053       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6054   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6055   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6056   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6057                                     Name, &CGM.getModule());
6058   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6059   Fn->setDoesNotRecurse();
6060   CodeGenFunction CGF(CGM);
6061   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6062   Address PrivateAddr = CGF.EmitLoadOfPointer(
6063       CGF.GetAddrOfLocalVar(&Param),
6064       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6065   llvm::Value *Size = nullptr;
6066   // If the size of the reduction item is non-constant, load it from global
6067   // threadprivate variable.
6068   if (RCG.getSizes(N).second) {
6069     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6070         CGF, CGM.getContext().getSizeType(),
6071         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6072     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6073                                 CGM.getContext().getSizeType(), Loc);
6074   }
6075   RCG.emitAggregateType(CGF, N, Size);
6076   // Emit the finalizer body:
6077   // <destroy>(<type>* %0)
6078   RCG.emitCleanups(CGF, N, PrivateAddr);
6079   CGF.FinishFunction(Loc);
6080   return Fn;
6081 }
6082 
6083 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6084     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6085     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6086   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6087     return nullptr;
6088 
6089   // Build typedef struct:
6090   // kmp_taskred_input {
6091   //   void *reduce_shar; // shared reduction item
6092   //   void *reduce_orig; // original reduction item used for initialization
6093   //   size_t reduce_size; // size of data item
6094   //   void *reduce_init; // data initialization routine
6095   //   void *reduce_fini; // data finalization routine
6096   //   void *reduce_comb; // data combiner routine
6097   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6098   // } kmp_taskred_input_t;
6099   ASTContext &C = CGM.getContext();
6100   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6101   RD->startDefinition();
6102   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6103   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6104   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6105   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6106   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6107   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6108   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6109       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6110   RD->completeDefinition();
6111   QualType RDType = C.getRecordType(RD);
6112   unsigned Size = Data.ReductionVars.size();
6113   llvm::APInt ArraySize(/*numBits=*/64, Size);
6114   QualType ArrayRDType = C.getConstantArrayType(
6115       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6116   // kmp_task_red_input_t .rd_input.[Size];
6117   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6118   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6119                        Data.ReductionCopies, Data.ReductionOps);
6120   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6121     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6122     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6123                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6124     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6125         TaskRedInput.getPointer(), Idxs,
6126         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6127         ".rd_input.gep.");
6128     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6129     // ElemLVal.reduce_shar = &Shareds[Cnt];
6130     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6131     RCG.emitSharedOrigLValue(CGF, Cnt);
6132     llvm::Value *CastedShared =
6133         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6134     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6135     // ElemLVal.reduce_orig = &Origs[Cnt];
6136     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6137     llvm::Value *CastedOrig =
6138         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6139     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6140     RCG.emitAggregateType(CGF, Cnt);
6141     llvm::Value *SizeValInChars;
6142     llvm::Value *SizeVal;
6143     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6144     // We use delayed creation/initialization for VLAs and array sections. It is
6145     // required because runtime does not provide the way to pass the sizes of
6146     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6147     // threadprivate global variables are used to store these values and use
6148     // them in the functions.
6149     bool DelayedCreation = !!SizeVal;
6150     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6151                                                /*isSigned=*/false);
6152     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6153     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6154     // ElemLVal.reduce_init = init;
6155     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6156     llvm::Value *InitAddr =
6157         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6158     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6159     // ElemLVal.reduce_fini = fini;
6160     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6161     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6162     llvm::Value *FiniAddr = Fini
6163                                 ? CGF.EmitCastToVoidPtr(Fini)
6164                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6165     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6166     // ElemLVal.reduce_comb = comb;
6167     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6168     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6169         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6170         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6171     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6172     // ElemLVal.flags = 0;
6173     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6174     if (DelayedCreation) {
6175       CGF.EmitStoreOfScalar(
6176           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6177           FlagsLVal);
6178     } else
6179       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6180                                  FlagsLVal.getType());
6181   }
6182   if (Data.IsReductionWithTaskMod) {
6183     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6184     // is_ws, int num, void *data);
6185     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6186     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6187                                                   CGM.IntTy, /*isSigned=*/true);
6188     llvm::Value *Args[] = {
6189         IdentTLoc, GTid,
6190         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6191                                /*isSigned=*/true),
6192         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6193         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6194             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6195     return CGF.EmitRuntimeCall(
6196         OMPBuilder.getOrCreateRuntimeFunction(
6197             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6198         Args);
6199   }
6200   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6201   llvm::Value *Args[] = {
6202       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6203                                 /*isSigned=*/true),
6204       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6205       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6206                                                       CGM.VoidPtrTy)};
6207   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6208                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6209                              Args);
6210 }
6211 
6212 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6213                                             SourceLocation Loc,
6214                                             bool IsWorksharingReduction) {
6215   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6216   // is_ws, int num, void *data);
6217   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6218   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6219                                                 CGM.IntTy, /*isSigned=*/true);
6220   llvm::Value *Args[] = {IdentTLoc, GTid,
6221                          llvm::ConstantInt::get(CGM.IntTy,
6222                                                 IsWorksharingReduction ? 1 : 0,
6223                                                 /*isSigned=*/true)};
6224   (void)CGF.EmitRuntimeCall(
6225       OMPBuilder.getOrCreateRuntimeFunction(
6226           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6227       Args);
6228 }
6229 
6230 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6231                                               SourceLocation Loc,
6232                                               ReductionCodeGen &RCG,
6233                                               unsigned N) {
6234   auto Sizes = RCG.getSizes(N);
6235   // Emit threadprivate global variable if the type is non-constant
6236   // (Sizes.second = nullptr).
6237   if (Sizes.second) {
6238     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6239                                                      /*isSigned=*/false);
6240     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6241         CGF, CGM.getContext().getSizeType(),
6242         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6243     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6244   }
6245 }
6246 
6247 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6248                                               SourceLocation Loc,
6249                                               llvm::Value *ReductionsPtr,
6250                                               LValue SharedLVal) {
6251   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6252   // *d);
6253   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6254                                                    CGM.IntTy,
6255                                                    /*isSigned=*/true),
6256                          ReductionsPtr,
6257                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6258                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6259   return Address(
6260       CGF.EmitRuntimeCall(
6261           OMPBuilder.getOrCreateRuntimeFunction(
6262               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6263           Args),
6264       SharedLVal.getAlignment());
6265 }
6266 
6267 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6268                                        SourceLocation Loc) {
6269   if (!CGF.HaveInsertPoint())
6270     return;
6271 
6272   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6273     OMPBuilder.createTaskwait(CGF.Builder);
6274   } else {
6275     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6276     // global_tid);
6277     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6278     // Ignore return result until untied tasks are supported.
6279     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6280                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6281                         Args);
6282   }
6283 
6284   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6285     Region->emitUntiedSwitch(CGF);
6286 }
6287 
6288 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6289                                            OpenMPDirectiveKind InnerKind,
6290                                            const RegionCodeGenTy &CodeGen,
6291                                            bool HasCancel) {
6292   if (!CGF.HaveInsertPoint())
6293     return;
6294   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6295                                  InnerKind != OMPD_critical &&
6296                                      InnerKind != OMPD_master &&
6297                                      InnerKind != OMPD_masked);
6298   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6299 }
6300 
6301 namespace {
6302 enum RTCancelKind {
6303   CancelNoreq = 0,
6304   CancelParallel = 1,
6305   CancelLoop = 2,
6306   CancelSections = 3,
6307   CancelTaskgroup = 4
6308 };
6309 } // anonymous namespace
6310 
6311 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6312   RTCancelKind CancelKind = CancelNoreq;
6313   if (CancelRegion == OMPD_parallel)
6314     CancelKind = CancelParallel;
6315   else if (CancelRegion == OMPD_for)
6316     CancelKind = CancelLoop;
6317   else if (CancelRegion == OMPD_sections)
6318     CancelKind = CancelSections;
6319   else {
6320     assert(CancelRegion == OMPD_taskgroup);
6321     CancelKind = CancelTaskgroup;
6322   }
6323   return CancelKind;
6324 }
6325 
6326 void CGOpenMPRuntime::emitCancellationPointCall(
6327     CodeGenFunction &CGF, SourceLocation Loc,
6328     OpenMPDirectiveKind CancelRegion) {
6329   if (!CGF.HaveInsertPoint())
6330     return;
6331   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6332   // global_tid, kmp_int32 cncl_kind);
6333   if (auto *OMPRegionInfo =
6334           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6335     // For 'cancellation point taskgroup', the task region info may not have a
6336     // cancel. This may instead happen in another adjacent task.
6337     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6338       llvm::Value *Args[] = {
6339           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6340           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6341       // Ignore return result until untied tasks are supported.
6342       llvm::Value *Result = CGF.EmitRuntimeCall(
6343           OMPBuilder.getOrCreateRuntimeFunction(
6344               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6345           Args);
6346       // if (__kmpc_cancellationpoint()) {
6347       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6348       //   exit from construct;
6349       // }
6350       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6351       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6352       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6353       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6354       CGF.EmitBlock(ExitBB);
6355       if (CancelRegion == OMPD_parallel)
6356         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6357       // exit from construct;
6358       CodeGenFunction::JumpDest CancelDest =
6359           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6360       CGF.EmitBranchThroughCleanup(CancelDest);
6361       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6362     }
6363   }
6364 }
6365 
6366 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6367                                      const Expr *IfCond,
6368                                      OpenMPDirectiveKind CancelRegion) {
6369   if (!CGF.HaveInsertPoint())
6370     return;
6371   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6372   // kmp_int32 cncl_kind);
6373   auto &M = CGM.getModule();
6374   if (auto *OMPRegionInfo =
6375           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6376     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6377                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6378       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6379       llvm::Value *Args[] = {
6380           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6381           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6382       // Ignore return result until untied tasks are supported.
6383       llvm::Value *Result = CGF.EmitRuntimeCall(
6384           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6385       // if (__kmpc_cancel()) {
6386       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6387       //   exit from construct;
6388       // }
6389       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6390       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6391       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6392       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6393       CGF.EmitBlock(ExitBB);
6394       if (CancelRegion == OMPD_parallel)
6395         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6396       // exit from construct;
6397       CodeGenFunction::JumpDest CancelDest =
6398           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6399       CGF.EmitBranchThroughCleanup(CancelDest);
6400       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6401     };
6402     if (IfCond) {
6403       emitIfClause(CGF, IfCond, ThenGen,
6404                    [](CodeGenFunction &, PrePostActionTy &) {});
6405     } else {
6406       RegionCodeGenTy ThenRCG(ThenGen);
6407       ThenRCG(CGF);
6408     }
6409   }
6410 }
6411 
6412 namespace {
6413 /// Cleanup action for uses_allocators support.
6414 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6415   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6416 
6417 public:
6418   OMPUsesAllocatorsActionTy(
6419       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6420       : Allocators(Allocators) {}
6421   void Enter(CodeGenFunction &CGF) override {
6422     if (!CGF.HaveInsertPoint())
6423       return;
6424     for (const auto &AllocatorData : Allocators) {
6425       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6426           CGF, AllocatorData.first, AllocatorData.second);
6427     }
6428   }
6429   void Exit(CodeGenFunction &CGF) override {
6430     if (!CGF.HaveInsertPoint())
6431       return;
6432     for (const auto &AllocatorData : Allocators) {
6433       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6434                                                         AllocatorData.first);
6435     }
6436   }
6437 };
6438 } // namespace
6439 
6440 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6441     const OMPExecutableDirective &D, StringRef ParentName,
6442     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6443     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6444   assert(!ParentName.empty() && "Invalid target region parent name!");
6445   HasEmittedTargetRegion = true;
6446   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6447   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6448     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6449       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6450       if (!D.AllocatorTraits)
6451         continue;
6452       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6453     }
6454   }
6455   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6456   CodeGen.setAction(UsesAllocatorAction);
6457   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6458                                    IsOffloadEntry, CodeGen);
6459 }
6460 
6461 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6462                                              const Expr *Allocator,
6463                                              const Expr *AllocatorTraits) {
6464   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6465   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6466   // Use default memspace handle.
6467   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6468   llvm::Value *NumTraits = llvm::ConstantInt::get(
6469       CGF.IntTy, cast<ConstantArrayType>(
6470                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6471                      ->getSize()
6472                      .getLimitedValue());
6473   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6474   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6475       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6476   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6477                                            AllocatorTraitsLVal.getBaseInfo(),
6478                                            AllocatorTraitsLVal.getTBAAInfo());
6479   llvm::Value *Traits =
6480       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6481 
6482   llvm::Value *AllocatorVal =
6483       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6484                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6485                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6486   // Store to allocator.
6487   CGF.EmitVarDecl(*cast<VarDecl>(
6488       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6489   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6490   AllocatorVal =
6491       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6492                                Allocator->getType(), Allocator->getExprLoc());
6493   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6494 }
6495 
6496 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6497                                              const Expr *Allocator) {
6498   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6499   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6500   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6501   llvm::Value *AllocatorVal =
6502       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6503   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6504                                           CGF.getContext().VoidPtrTy,
6505                                           Allocator->getExprLoc());
6506   (void)CGF.EmitRuntimeCall(
6507       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6508                                             OMPRTL___kmpc_destroy_allocator),
6509       {ThreadId, AllocatorVal});
6510 }
6511 
6512 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6513     const OMPExecutableDirective &D, StringRef ParentName,
6514     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6515     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6516   // Create a unique name for the entry function using the source location
6517   // information of the current target region. The name will be something like:
6518   //
6519   // __omp_offloading_DD_FFFF_PP_lBB
6520   //
6521   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6522   // mangled name of the function that encloses the target region and BB is the
6523   // line number of the target region.
6524 
6525   unsigned DeviceID;
6526   unsigned FileID;
6527   unsigned Line;
6528   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6529                            Line);
6530   SmallString<64> EntryFnName;
6531   {
6532     llvm::raw_svector_ostream OS(EntryFnName);
6533     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6534        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6535   }
6536 
6537   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6538 
6539   CodeGenFunction CGF(CGM, true);
6540   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6541   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6542 
6543   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6544 
6545   // If this target outline function is not an offload entry, we don't need to
6546   // register it.
6547   if (!IsOffloadEntry)
6548     return;
6549 
6550   // The target region ID is used by the runtime library to identify the current
6551   // target region, so it only has to be unique and not necessarily point to
6552   // anything. It could be the pointer to the outlined function that implements
6553   // the target region, but we aren't using that so that the compiler doesn't
6554   // need to keep that, and could therefore inline the host function if proven
6555   // worthwhile during optimization. In the other hand, if emitting code for the
6556   // device, the ID has to be the function address so that it can retrieved from
6557   // the offloading entry and launched by the runtime library. We also mark the
6558   // outlined function to have external linkage in case we are emitting code for
6559   // the device, because these functions will be entry points to the device.
6560 
6561   if (CGM.getLangOpts().OpenMPIsDevice) {
6562     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6563     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6564     OutlinedFn->setDSOLocal(false);
6565     if (CGM.getTriple().isAMDGCN())
6566       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6567   } else {
6568     std::string Name = getName({EntryFnName, "region_id"});
6569     OutlinedFnID = new llvm::GlobalVariable(
6570         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6571         llvm::GlobalValue::WeakAnyLinkage,
6572         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6573   }
6574 
6575   // Register the information for the entry associated with this target region.
6576   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6577       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6578       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6579 
6580   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6581   int32_t DefaultValTeams = -1;
6582   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6583   if (DefaultValTeams > 0) {
6584     OutlinedFn->addFnAttr("omp_target_num_teams",
6585                           std::to_string(DefaultValTeams));
6586   }
6587   int32_t DefaultValThreads = -1;
6588   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6589   if (DefaultValThreads > 0) {
6590     OutlinedFn->addFnAttr("omp_target_thread_limit",
6591                           std::to_string(DefaultValThreads));
6592   }
6593 }
6594 
6595 /// Checks if the expression is constant or does not have non-trivial function
6596 /// calls.
6597 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6598   // We can skip constant expressions.
6599   // We can skip expressions with trivial calls or simple expressions.
6600   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6601           !E->hasNonTrivialCall(Ctx)) &&
6602          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6603 }
6604 
6605 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6606                                                     const Stmt *Body) {
6607   const Stmt *Child = Body->IgnoreContainers();
6608   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6609     Child = nullptr;
6610     for (const Stmt *S : C->body()) {
6611       if (const auto *E = dyn_cast<Expr>(S)) {
6612         if (isTrivial(Ctx, E))
6613           continue;
6614       }
6615       // Some of the statements can be ignored.
6616       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6617           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6618         continue;
6619       // Analyze declarations.
6620       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6621         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6622               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6623                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6624                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6625                   isa<UsingDirectiveDecl>(D) ||
6626                   isa<OMPDeclareReductionDecl>(D) ||
6627                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6628                 return true;
6629               const auto *VD = dyn_cast<VarDecl>(D);
6630               if (!VD)
6631                 return false;
6632               return VD->hasGlobalStorage() || !VD->isUsed();
6633             }))
6634           continue;
6635       }
6636       // Found multiple children - cannot get the one child only.
6637       if (Child)
6638         return nullptr;
6639       Child = S;
6640     }
6641     if (Child)
6642       Child = Child->IgnoreContainers();
6643   }
6644   return Child;
6645 }
6646 
6647 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6648     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6649     int32_t &DefaultVal) {
6650 
6651   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6652   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6653          "Expected target-based executable directive.");
6654   switch (DirectiveKind) {
6655   case OMPD_target: {
6656     const auto *CS = D.getInnermostCapturedStmt();
6657     const auto *Body =
6658         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6659     const Stmt *ChildStmt =
6660         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6661     if (const auto *NestedDir =
6662             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6663       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6664         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6665           const Expr *NumTeams =
6666               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6667           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6668             if (auto Constant =
6669                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6670               DefaultVal = Constant->getExtValue();
6671           return NumTeams;
6672         }
6673         DefaultVal = 0;
6674         return nullptr;
6675       }
6676       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6677           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6678         DefaultVal = 1;
6679         return nullptr;
6680       }
6681       DefaultVal = 1;
6682       return nullptr;
6683     }
6684     // A value of -1 is used to check if we need to emit no teams region
6685     DefaultVal = -1;
6686     return nullptr;
6687   }
6688   case OMPD_target_teams:
6689   case OMPD_target_teams_distribute:
6690   case OMPD_target_teams_distribute_simd:
6691   case OMPD_target_teams_distribute_parallel_for:
6692   case OMPD_target_teams_distribute_parallel_for_simd: {
6693     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6694       const Expr *NumTeams =
6695           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6696       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6697         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6698           DefaultVal = Constant->getExtValue();
6699       return NumTeams;
6700     }
6701     DefaultVal = 0;
6702     return nullptr;
6703   }
6704   case OMPD_target_parallel:
6705   case OMPD_target_parallel_for:
6706   case OMPD_target_parallel_for_simd:
6707   case OMPD_target_simd:
6708     DefaultVal = 1;
6709     return nullptr;
6710   case OMPD_parallel:
6711   case OMPD_for:
6712   case OMPD_parallel_for:
6713   case OMPD_parallel_master:
6714   case OMPD_parallel_sections:
6715   case OMPD_for_simd:
6716   case OMPD_parallel_for_simd:
6717   case OMPD_cancel:
6718   case OMPD_cancellation_point:
6719   case OMPD_ordered:
6720   case OMPD_threadprivate:
6721   case OMPD_allocate:
6722   case OMPD_task:
6723   case OMPD_simd:
6724   case OMPD_tile:
6725   case OMPD_unroll:
6726   case OMPD_sections:
6727   case OMPD_section:
6728   case OMPD_single:
6729   case OMPD_master:
6730   case OMPD_critical:
6731   case OMPD_taskyield:
6732   case OMPD_barrier:
6733   case OMPD_taskwait:
6734   case OMPD_taskgroup:
6735   case OMPD_atomic:
6736   case OMPD_flush:
6737   case OMPD_depobj:
6738   case OMPD_scan:
6739   case OMPD_teams:
6740   case OMPD_target_data:
6741   case OMPD_target_exit_data:
6742   case OMPD_target_enter_data:
6743   case OMPD_distribute:
6744   case OMPD_distribute_simd:
6745   case OMPD_distribute_parallel_for:
6746   case OMPD_distribute_parallel_for_simd:
6747   case OMPD_teams_distribute:
6748   case OMPD_teams_distribute_simd:
6749   case OMPD_teams_distribute_parallel_for:
6750   case OMPD_teams_distribute_parallel_for_simd:
6751   case OMPD_target_update:
6752   case OMPD_declare_simd:
6753   case OMPD_declare_variant:
6754   case OMPD_begin_declare_variant:
6755   case OMPD_end_declare_variant:
6756   case OMPD_declare_target:
6757   case OMPD_end_declare_target:
6758   case OMPD_declare_reduction:
6759   case OMPD_declare_mapper:
6760   case OMPD_taskloop:
6761   case OMPD_taskloop_simd:
6762   case OMPD_master_taskloop:
6763   case OMPD_master_taskloop_simd:
6764   case OMPD_parallel_master_taskloop:
6765   case OMPD_parallel_master_taskloop_simd:
6766   case OMPD_requires:
6767   case OMPD_metadirective:
6768   case OMPD_unknown:
6769     break;
6770   default:
6771     break;
6772   }
6773   llvm_unreachable("Unexpected directive kind.");
6774 }
6775 
6776 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6777     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6778   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6779          "Clauses associated with the teams directive expected to be emitted "
6780          "only for the host!");
6781   CGBuilderTy &Bld = CGF.Builder;
6782   int32_t DefaultNT = -1;
6783   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6784   if (NumTeams != nullptr) {
6785     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6786 
6787     switch (DirectiveKind) {
6788     case OMPD_target: {
6789       const auto *CS = D.getInnermostCapturedStmt();
6790       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6791       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6792       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6793                                                   /*IgnoreResultAssign*/ true);
6794       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6795                              /*isSigned=*/true);
6796     }
6797     case OMPD_target_teams:
6798     case OMPD_target_teams_distribute:
6799     case OMPD_target_teams_distribute_simd:
6800     case OMPD_target_teams_distribute_parallel_for:
6801     case OMPD_target_teams_distribute_parallel_for_simd: {
6802       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6803       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6804                                                   /*IgnoreResultAssign*/ true);
6805       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6806                              /*isSigned=*/true);
6807     }
6808     default:
6809       break;
6810     }
6811   } else if (DefaultNT == -1) {
6812     return nullptr;
6813   }
6814 
6815   return Bld.getInt32(DefaultNT);
6816 }
6817 
6818 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6819                                   llvm::Value *DefaultThreadLimitVal) {
6820   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6821       CGF.getContext(), CS->getCapturedStmt());
6822   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6823     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6824       llvm::Value *NumThreads = nullptr;
6825       llvm::Value *CondVal = nullptr;
6826       // Handle if clause. If if clause present, the number of threads is
6827       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6828       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6829         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6830         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6831         const OMPIfClause *IfClause = nullptr;
6832         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6833           if (C->getNameModifier() == OMPD_unknown ||
6834               C->getNameModifier() == OMPD_parallel) {
6835             IfClause = C;
6836             break;
6837           }
6838         }
6839         if (IfClause) {
6840           const Expr *Cond = IfClause->getCondition();
6841           bool Result;
6842           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6843             if (!Result)
6844               return CGF.Builder.getInt32(1);
6845           } else {
6846             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6847             if (const auto *PreInit =
6848                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6849               for (const auto *I : PreInit->decls()) {
6850                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6851                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6852                 } else {
6853                   CodeGenFunction::AutoVarEmission Emission =
6854                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6855                   CGF.EmitAutoVarCleanups(Emission);
6856                 }
6857               }
6858             }
6859             CondVal = CGF.EvaluateExprAsBool(Cond);
6860           }
6861         }
6862       }
6863       // Check the value of num_threads clause iff if clause was not specified
6864       // or is not evaluated to false.
6865       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6866         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6867         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6868         const auto *NumThreadsClause =
6869             Dir->getSingleClause<OMPNumThreadsClause>();
6870         CodeGenFunction::LexicalScope Scope(
6871             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6872         if (const auto *PreInit =
6873                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6874           for (const auto *I : PreInit->decls()) {
6875             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6876               CGF.EmitVarDecl(cast<VarDecl>(*I));
6877             } else {
6878               CodeGenFunction::AutoVarEmission Emission =
6879                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6880               CGF.EmitAutoVarCleanups(Emission);
6881             }
6882           }
6883         }
6884         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6885         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6886                                                /*isSigned=*/false);
6887         if (DefaultThreadLimitVal)
6888           NumThreads = CGF.Builder.CreateSelect(
6889               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6890               DefaultThreadLimitVal, NumThreads);
6891       } else {
6892         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6893                                            : CGF.Builder.getInt32(0);
6894       }
6895       // Process condition of the if clause.
6896       if (CondVal) {
6897         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6898                                               CGF.Builder.getInt32(1));
6899       }
6900       return NumThreads;
6901     }
6902     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6903       return CGF.Builder.getInt32(1);
6904     return DefaultThreadLimitVal;
6905   }
6906   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6907                                : CGF.Builder.getInt32(0);
6908 }
6909 
6910 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6911     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6912     int32_t &DefaultVal) {
6913   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6914   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6915          "Expected target-based executable directive.");
6916 
6917   switch (DirectiveKind) {
6918   case OMPD_target:
6919     // Teams have no clause thread_limit
6920     return nullptr;
6921   case OMPD_target_teams:
6922   case OMPD_target_teams_distribute:
6923     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6924       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6925       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6926       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6927         if (auto Constant =
6928                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6929           DefaultVal = Constant->getExtValue();
6930       return ThreadLimit;
6931     }
6932     return nullptr;
6933   case OMPD_target_parallel:
6934   case OMPD_target_parallel_for:
6935   case OMPD_target_parallel_for_simd:
6936   case OMPD_target_teams_distribute_parallel_for:
6937   case OMPD_target_teams_distribute_parallel_for_simd: {
6938     Expr *ThreadLimit = nullptr;
6939     Expr *NumThreads = nullptr;
6940     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6941       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6942       ThreadLimit = ThreadLimitClause->getThreadLimit();
6943       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6944         if (auto Constant =
6945                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6946           DefaultVal = Constant->getExtValue();
6947     }
6948     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6949       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6950       NumThreads = NumThreadsClause->getNumThreads();
6951       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6952         if (auto Constant =
6953                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6954           if (Constant->getExtValue() < DefaultVal) {
6955             DefaultVal = Constant->getExtValue();
6956             ThreadLimit = NumThreads;
6957           }
6958         }
6959       }
6960     }
6961     return ThreadLimit;
6962   }
6963   case OMPD_target_teams_distribute_simd:
6964   case OMPD_target_simd:
6965     DefaultVal = 1;
6966     return nullptr;
6967   case OMPD_parallel:
6968   case OMPD_for:
6969   case OMPD_parallel_for:
6970   case OMPD_parallel_master:
6971   case OMPD_parallel_sections:
6972   case OMPD_for_simd:
6973   case OMPD_parallel_for_simd:
6974   case OMPD_cancel:
6975   case OMPD_cancellation_point:
6976   case OMPD_ordered:
6977   case OMPD_threadprivate:
6978   case OMPD_allocate:
6979   case OMPD_task:
6980   case OMPD_simd:
6981   case OMPD_tile:
6982   case OMPD_unroll:
6983   case OMPD_sections:
6984   case OMPD_section:
6985   case OMPD_single:
6986   case OMPD_master:
6987   case OMPD_critical:
6988   case OMPD_taskyield:
6989   case OMPD_barrier:
6990   case OMPD_taskwait:
6991   case OMPD_taskgroup:
6992   case OMPD_atomic:
6993   case OMPD_flush:
6994   case OMPD_depobj:
6995   case OMPD_scan:
6996   case OMPD_teams:
6997   case OMPD_target_data:
6998   case OMPD_target_exit_data:
6999   case OMPD_target_enter_data:
7000   case OMPD_distribute:
7001   case OMPD_distribute_simd:
7002   case OMPD_distribute_parallel_for:
7003   case OMPD_distribute_parallel_for_simd:
7004   case OMPD_teams_distribute:
7005   case OMPD_teams_distribute_simd:
7006   case OMPD_teams_distribute_parallel_for:
7007   case OMPD_teams_distribute_parallel_for_simd:
7008   case OMPD_target_update:
7009   case OMPD_declare_simd:
7010   case OMPD_declare_variant:
7011   case OMPD_begin_declare_variant:
7012   case OMPD_end_declare_variant:
7013   case OMPD_declare_target:
7014   case OMPD_end_declare_target:
7015   case OMPD_declare_reduction:
7016   case OMPD_declare_mapper:
7017   case OMPD_taskloop:
7018   case OMPD_taskloop_simd:
7019   case OMPD_master_taskloop:
7020   case OMPD_master_taskloop_simd:
7021   case OMPD_parallel_master_taskloop:
7022   case OMPD_parallel_master_taskloop_simd:
7023   case OMPD_requires:
7024   case OMPD_unknown:
7025     break;
7026   default:
7027     break;
7028   }
7029   llvm_unreachable("Unsupported directive kind.");
7030 }
7031 
7032 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7033     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7034   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7035          "Clauses associated with the teams directive expected to be emitted "
7036          "only for the host!");
7037   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7038   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7039          "Expected target-based executable directive.");
7040   CGBuilderTy &Bld = CGF.Builder;
7041   llvm::Value *ThreadLimitVal = nullptr;
7042   llvm::Value *NumThreadsVal = nullptr;
7043   switch (DirectiveKind) {
7044   case OMPD_target: {
7045     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7046     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7047       return NumThreads;
7048     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7049         CGF.getContext(), CS->getCapturedStmt());
7050     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7051       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7052         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7053         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7054         const auto *ThreadLimitClause =
7055             Dir->getSingleClause<OMPThreadLimitClause>();
7056         CodeGenFunction::LexicalScope Scope(
7057             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7058         if (const auto *PreInit =
7059                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7060           for (const auto *I : PreInit->decls()) {
7061             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7062               CGF.EmitVarDecl(cast<VarDecl>(*I));
7063             } else {
7064               CodeGenFunction::AutoVarEmission Emission =
7065                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7066               CGF.EmitAutoVarCleanups(Emission);
7067             }
7068           }
7069         }
7070         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7071             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7072         ThreadLimitVal =
7073             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7074       }
7075       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7076           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7077         CS = Dir->getInnermostCapturedStmt();
7078         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7079             CGF.getContext(), CS->getCapturedStmt());
7080         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7081       }
7082       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7083           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7084         CS = Dir->getInnermostCapturedStmt();
7085         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7086           return NumThreads;
7087       }
7088       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7089         return Bld.getInt32(1);
7090     }
7091     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7092   }
7093   case OMPD_target_teams: {
7094     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7095       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7096       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7097       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7098           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7099       ThreadLimitVal =
7100           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7101     }
7102     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7103     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7104       return NumThreads;
7105     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7106         CGF.getContext(), CS->getCapturedStmt());
7107     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7108       if (Dir->getDirectiveKind() == OMPD_distribute) {
7109         CS = Dir->getInnermostCapturedStmt();
7110         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7111           return NumThreads;
7112       }
7113     }
7114     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7115   }
7116   case OMPD_target_teams_distribute:
7117     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7118       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7119       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7120       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7121           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7122       ThreadLimitVal =
7123           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7124     }
7125     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7126   case OMPD_target_parallel:
7127   case OMPD_target_parallel_for:
7128   case OMPD_target_parallel_for_simd:
7129   case OMPD_target_teams_distribute_parallel_for:
7130   case OMPD_target_teams_distribute_parallel_for_simd: {
7131     llvm::Value *CondVal = nullptr;
7132     // Handle if clause. If if clause present, the number of threads is
7133     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7134     if (D.hasClausesOfKind<OMPIfClause>()) {
7135       const OMPIfClause *IfClause = nullptr;
7136       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7137         if (C->getNameModifier() == OMPD_unknown ||
7138             C->getNameModifier() == OMPD_parallel) {
7139           IfClause = C;
7140           break;
7141         }
7142       }
7143       if (IfClause) {
7144         const Expr *Cond = IfClause->getCondition();
7145         bool Result;
7146         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7147           if (!Result)
7148             return Bld.getInt32(1);
7149         } else {
7150           CodeGenFunction::RunCleanupsScope Scope(CGF);
7151           CondVal = CGF.EvaluateExprAsBool(Cond);
7152         }
7153       }
7154     }
7155     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7156       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7157       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7158       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7159           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7160       ThreadLimitVal =
7161           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7162     }
7163     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7164       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7165       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7166       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7167           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7168       NumThreadsVal =
7169           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7170       ThreadLimitVal = ThreadLimitVal
7171                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7172                                                                 ThreadLimitVal),
7173                                               NumThreadsVal, ThreadLimitVal)
7174                            : NumThreadsVal;
7175     }
7176     if (!ThreadLimitVal)
7177       ThreadLimitVal = Bld.getInt32(0);
7178     if (CondVal)
7179       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7180     return ThreadLimitVal;
7181   }
7182   case OMPD_target_teams_distribute_simd:
7183   case OMPD_target_simd:
7184     return Bld.getInt32(1);
7185   case OMPD_parallel:
7186   case OMPD_for:
7187   case OMPD_parallel_for:
7188   case OMPD_parallel_master:
7189   case OMPD_parallel_sections:
7190   case OMPD_for_simd:
7191   case OMPD_parallel_for_simd:
7192   case OMPD_cancel:
7193   case OMPD_cancellation_point:
7194   case OMPD_ordered:
7195   case OMPD_threadprivate:
7196   case OMPD_allocate:
7197   case OMPD_task:
7198   case OMPD_simd:
7199   case OMPD_tile:
7200   case OMPD_unroll:
7201   case OMPD_sections:
7202   case OMPD_section:
7203   case OMPD_single:
7204   case OMPD_master:
7205   case OMPD_critical:
7206   case OMPD_taskyield:
7207   case OMPD_barrier:
7208   case OMPD_taskwait:
7209   case OMPD_taskgroup:
7210   case OMPD_atomic:
7211   case OMPD_flush:
7212   case OMPD_depobj:
7213   case OMPD_scan:
7214   case OMPD_teams:
7215   case OMPD_target_data:
7216   case OMPD_target_exit_data:
7217   case OMPD_target_enter_data:
7218   case OMPD_distribute:
7219   case OMPD_distribute_simd:
7220   case OMPD_distribute_parallel_for:
7221   case OMPD_distribute_parallel_for_simd:
7222   case OMPD_teams_distribute:
7223   case OMPD_teams_distribute_simd:
7224   case OMPD_teams_distribute_parallel_for:
7225   case OMPD_teams_distribute_parallel_for_simd:
7226   case OMPD_target_update:
7227   case OMPD_declare_simd:
7228   case OMPD_declare_variant:
7229   case OMPD_begin_declare_variant:
7230   case OMPD_end_declare_variant:
7231   case OMPD_declare_target:
7232   case OMPD_end_declare_target:
7233   case OMPD_declare_reduction:
7234   case OMPD_declare_mapper:
7235   case OMPD_taskloop:
7236   case OMPD_taskloop_simd:
7237   case OMPD_master_taskloop:
7238   case OMPD_master_taskloop_simd:
7239   case OMPD_parallel_master_taskloop:
7240   case OMPD_parallel_master_taskloop_simd:
7241   case OMPD_requires:
7242   case OMPD_metadirective:
7243   case OMPD_unknown:
7244     break;
7245   default:
7246     break;
7247   }
7248   llvm_unreachable("Unsupported directive kind.");
7249 }
7250 
7251 namespace {
7252 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7253 
7254 // Utility to handle information from clauses associated with a given
7255 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7256 // It provides a convenient interface to obtain the information and generate
7257 // code for that information.
7258 class MappableExprsHandler {
7259 public:
7260   /// Values for bit flags used to specify the mapping type for
7261   /// offloading.
7262   enum OpenMPOffloadMappingFlags : uint64_t {
7263     /// No flags
7264     OMP_MAP_NONE = 0x0,
7265     /// Allocate memory on the device and move data from host to device.
7266     OMP_MAP_TO = 0x01,
7267     /// Allocate memory on the device and move data from device to host.
7268     OMP_MAP_FROM = 0x02,
7269     /// Always perform the requested mapping action on the element, even
7270     /// if it was already mapped before.
7271     OMP_MAP_ALWAYS = 0x04,
7272     /// Delete the element from the device environment, ignoring the
7273     /// current reference count associated with the element.
7274     OMP_MAP_DELETE = 0x08,
7275     /// The element being mapped is a pointer-pointee pair; both the
7276     /// pointer and the pointee should be mapped.
7277     OMP_MAP_PTR_AND_OBJ = 0x10,
7278     /// This flags signals that the base address of an entry should be
7279     /// passed to the target kernel as an argument.
7280     OMP_MAP_TARGET_PARAM = 0x20,
7281     /// Signal that the runtime library has to return the device pointer
7282     /// in the current position for the data being mapped. Used when we have the
7283     /// use_device_ptr or use_device_addr clause.
7284     OMP_MAP_RETURN_PARAM = 0x40,
7285     /// This flag signals that the reference being passed is a pointer to
7286     /// private data.
7287     OMP_MAP_PRIVATE = 0x80,
7288     /// Pass the element to the device by value.
7289     OMP_MAP_LITERAL = 0x100,
7290     /// Implicit map
7291     OMP_MAP_IMPLICIT = 0x200,
7292     /// Close is a hint to the runtime to allocate memory close to
7293     /// the target device.
7294     OMP_MAP_CLOSE = 0x400,
7295     /// 0x800 is reserved for compatibility with XLC.
7296     /// Produce a runtime error if the data is not already allocated.
7297     OMP_MAP_PRESENT = 0x1000,
7298     // Increment and decrement a separate reference counter so that the data
7299     // cannot be unmapped within the associated region.  Thus, this flag is
7300     // intended to be used on 'target' and 'target data' directives because they
7301     // are inherently structured.  It is not intended to be used on 'target
7302     // enter data' and 'target exit data' directives because they are inherently
7303     // dynamic.
7304     // This is an OpenMP extension for the sake of OpenACC support.
7305     OMP_MAP_OMPX_HOLD = 0x2000,
7306     /// Signal that the runtime library should use args as an array of
7307     /// descriptor_dim pointers and use args_size as dims. Used when we have
7308     /// non-contiguous list items in target update directive
7309     OMP_MAP_NON_CONTIG = 0x100000000000,
7310     /// The 16 MSBs of the flags indicate whether the entry is member of some
7311     /// struct/class.
7312     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7313     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7314   };
7315 
7316   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7317   static unsigned getFlagMemberOffset() {
7318     unsigned Offset = 0;
7319     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7320          Remain = Remain >> 1)
7321       Offset++;
7322     return Offset;
7323   }
7324 
7325   /// Class that holds debugging information for a data mapping to be passed to
7326   /// the runtime library.
7327   class MappingExprInfo {
7328     /// The variable declaration used for the data mapping.
7329     const ValueDecl *MapDecl = nullptr;
7330     /// The original expression used in the map clause, or null if there is
7331     /// none.
7332     const Expr *MapExpr = nullptr;
7333 
7334   public:
7335     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7336         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7337 
7338     const ValueDecl *getMapDecl() const { return MapDecl; }
7339     const Expr *getMapExpr() const { return MapExpr; }
7340   };
7341 
7342   /// Class that associates information with a base pointer to be passed to the
7343   /// runtime library.
7344   class BasePointerInfo {
7345     /// The base pointer.
7346     llvm::Value *Ptr = nullptr;
7347     /// The base declaration that refers to this device pointer, or null if
7348     /// there is none.
7349     const ValueDecl *DevPtrDecl = nullptr;
7350 
7351   public:
7352     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7353         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7354     llvm::Value *operator*() const { return Ptr; }
7355     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7356     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7357   };
7358 
7359   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7360   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7361   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7362   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7363   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7364   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7365   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7366 
7367   /// This structure contains combined information generated for mappable
7368   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7369   /// mappers, and non-contiguous information.
7370   struct MapCombinedInfoTy {
7371     struct StructNonContiguousInfo {
7372       bool IsNonContiguous = false;
7373       MapDimArrayTy Dims;
7374       MapNonContiguousArrayTy Offsets;
7375       MapNonContiguousArrayTy Counts;
7376       MapNonContiguousArrayTy Strides;
7377     };
7378     MapExprsArrayTy Exprs;
7379     MapBaseValuesArrayTy BasePointers;
7380     MapValuesArrayTy Pointers;
7381     MapValuesArrayTy Sizes;
7382     MapFlagsArrayTy Types;
7383     MapMappersArrayTy Mappers;
7384     StructNonContiguousInfo NonContigInfo;
7385 
7386     /// Append arrays in \a CurInfo.
7387     void append(MapCombinedInfoTy &CurInfo) {
7388       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7389       BasePointers.append(CurInfo.BasePointers.begin(),
7390                           CurInfo.BasePointers.end());
7391       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7392       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7393       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7394       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7395       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7396                                  CurInfo.NonContigInfo.Dims.end());
7397       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7398                                     CurInfo.NonContigInfo.Offsets.end());
7399       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7400                                    CurInfo.NonContigInfo.Counts.end());
7401       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7402                                     CurInfo.NonContigInfo.Strides.end());
7403     }
7404   };
7405 
7406   /// Map between a struct and the its lowest & highest elements which have been
7407   /// mapped.
7408   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7409   ///                    HE(FieldIndex, Pointer)}
7410   struct StructRangeInfoTy {
7411     MapCombinedInfoTy PreliminaryMapData;
7412     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7413         0, Address::invalid()};
7414     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7415         0, Address::invalid()};
7416     Address Base = Address::invalid();
7417     Address LB = Address::invalid();
7418     bool IsArraySection = false;
7419     bool HasCompleteRecord = false;
7420   };
7421 
7422 private:
7423   /// Kind that defines how a device pointer has to be returned.
7424   struct MapInfo {
7425     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7426     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7427     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7428     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7429     bool ReturnDevicePointer = false;
7430     bool IsImplicit = false;
7431     const ValueDecl *Mapper = nullptr;
7432     const Expr *VarRef = nullptr;
7433     bool ForDeviceAddr = false;
7434 
7435     MapInfo() = default;
7436     MapInfo(
7437         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7438         OpenMPMapClauseKind MapType,
7439         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7440         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7441         bool ReturnDevicePointer, bool IsImplicit,
7442         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7443         bool ForDeviceAddr = false)
7444         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7445           MotionModifiers(MotionModifiers),
7446           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7447           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7448   };
7449 
7450   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7451   /// member and there is no map information about it, then emission of that
7452   /// entry is deferred until the whole struct has been processed.
7453   struct DeferredDevicePtrEntryTy {
7454     const Expr *IE = nullptr;
7455     const ValueDecl *VD = nullptr;
7456     bool ForDeviceAddr = false;
7457 
7458     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7459                              bool ForDeviceAddr)
7460         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7461   };
7462 
7463   /// The target directive from where the mappable clauses were extracted. It
7464   /// is either a executable directive or a user-defined mapper directive.
7465   llvm::PointerUnion<const OMPExecutableDirective *,
7466                      const OMPDeclareMapperDecl *>
7467       CurDir;
7468 
7469   /// Function the directive is being generated for.
7470   CodeGenFunction &CGF;
7471 
7472   /// Set of all first private variables in the current directive.
7473   /// bool data is set to true if the variable is implicitly marked as
7474   /// firstprivate, false otherwise.
7475   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7476 
7477   /// Map between device pointer declarations and their expression components.
7478   /// The key value for declarations in 'this' is null.
7479   llvm::DenseMap<
7480       const ValueDecl *,
7481       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7482       DevPointersMap;
7483 
7484   /// Map between lambda declarations and their map type.
7485   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7486 
7487   llvm::Value *getExprTypeSize(const Expr *E) const {
7488     QualType ExprTy = E->getType().getCanonicalType();
7489 
7490     // Calculate the size for array shaping expression.
7491     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7492       llvm::Value *Size =
7493           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7494       for (const Expr *SE : OAE->getDimensions()) {
7495         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7496         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7497                                       CGF.getContext().getSizeType(),
7498                                       SE->getExprLoc());
7499         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7500       }
7501       return Size;
7502     }
7503 
7504     // Reference types are ignored for mapping purposes.
7505     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7506       ExprTy = RefTy->getPointeeType().getCanonicalType();
7507 
7508     // Given that an array section is considered a built-in type, we need to
7509     // do the calculation based on the length of the section instead of relying
7510     // on CGF.getTypeSize(E->getType()).
7511     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7512       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7513                             OAE->getBase()->IgnoreParenImpCasts())
7514                             .getCanonicalType();
7515 
7516       // If there is no length associated with the expression and lower bound is
7517       // not specified too, that means we are using the whole length of the
7518       // base.
7519       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7520           !OAE->getLowerBound())
7521         return CGF.getTypeSize(BaseTy);
7522 
7523       llvm::Value *ElemSize;
7524       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7525         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7526       } else {
7527         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7528         assert(ATy && "Expecting array type if not a pointer type.");
7529         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7530       }
7531 
7532       // If we don't have a length at this point, that is because we have an
7533       // array section with a single element.
7534       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7535         return ElemSize;
7536 
7537       if (const Expr *LenExpr = OAE->getLength()) {
7538         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7539         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7540                                              CGF.getContext().getSizeType(),
7541                                              LenExpr->getExprLoc());
7542         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7543       }
7544       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7545              OAE->getLowerBound() && "expected array_section[lb:].");
7546       // Size = sizetype - lb * elemtype;
7547       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7548       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7549       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7550                                        CGF.getContext().getSizeType(),
7551                                        OAE->getLowerBound()->getExprLoc());
7552       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7553       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7554       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7555       LengthVal = CGF.Builder.CreateSelect(
7556           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7557       return LengthVal;
7558     }
7559     return CGF.getTypeSize(ExprTy);
7560   }
7561 
7562   /// Return the corresponding bits for a given map clause modifier. Add
7563   /// a flag marking the map as a pointer if requested. Add a flag marking the
7564   /// map as the first one of a series of maps that relate to the same map
7565   /// expression.
7566   OpenMPOffloadMappingFlags getMapTypeBits(
7567       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7568       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7569       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7570     OpenMPOffloadMappingFlags Bits =
7571         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7572     switch (MapType) {
7573     case OMPC_MAP_alloc:
7574     case OMPC_MAP_release:
7575       // alloc and release is the default behavior in the runtime library,  i.e.
7576       // if we don't pass any bits alloc/release that is what the runtime is
7577       // going to do. Therefore, we don't need to signal anything for these two
7578       // type modifiers.
7579       break;
7580     case OMPC_MAP_to:
7581       Bits |= OMP_MAP_TO;
7582       break;
7583     case OMPC_MAP_from:
7584       Bits |= OMP_MAP_FROM;
7585       break;
7586     case OMPC_MAP_tofrom:
7587       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7588       break;
7589     case OMPC_MAP_delete:
7590       Bits |= OMP_MAP_DELETE;
7591       break;
7592     case OMPC_MAP_unknown:
7593       llvm_unreachable("Unexpected map type!");
7594     }
7595     if (AddPtrFlag)
7596       Bits |= OMP_MAP_PTR_AND_OBJ;
7597     if (AddIsTargetParamFlag)
7598       Bits |= OMP_MAP_TARGET_PARAM;
7599     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7600       Bits |= OMP_MAP_ALWAYS;
7601     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7602       Bits |= OMP_MAP_CLOSE;
7603     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7604         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7605       Bits |= OMP_MAP_PRESENT;
7606     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7607       Bits |= OMP_MAP_OMPX_HOLD;
7608     if (IsNonContiguous)
7609       Bits |= OMP_MAP_NON_CONTIG;
7610     return Bits;
7611   }
7612 
7613   /// Return true if the provided expression is a final array section. A
7614   /// final array section, is one whose length can't be proved to be one.
7615   bool isFinalArraySectionExpression(const Expr *E) const {
7616     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7617 
7618     // It is not an array section and therefore not a unity-size one.
7619     if (!OASE)
7620       return false;
7621 
7622     // An array section with no colon always refer to a single element.
7623     if (OASE->getColonLocFirst().isInvalid())
7624       return false;
7625 
7626     const Expr *Length = OASE->getLength();
7627 
7628     // If we don't have a length we have to check if the array has size 1
7629     // for this dimension. Also, we should always expect a length if the
7630     // base type is pointer.
7631     if (!Length) {
7632       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7633                              OASE->getBase()->IgnoreParenImpCasts())
7634                              .getCanonicalType();
7635       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7636         return ATy->getSize().getSExtValue() != 1;
7637       // If we don't have a constant dimension length, we have to consider
7638       // the current section as having any size, so it is not necessarily
7639       // unitary. If it happen to be unity size, that's user fault.
7640       return true;
7641     }
7642 
7643     // Check if the length evaluates to 1.
7644     Expr::EvalResult Result;
7645     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7646       return true; // Can have more that size 1.
7647 
7648     llvm::APSInt ConstLength = Result.Val.getInt();
7649     return ConstLength.getSExtValue() != 1;
7650   }
7651 
7652   /// Generate the base pointers, section pointers, sizes, map type bits, and
7653   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7654   /// map type, map or motion modifiers, and expression components.
7655   /// \a IsFirstComponent should be set to true if the provided set of
7656   /// components is the first associated with a capture.
7657   void generateInfoForComponentList(
7658       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7659       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7660       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7661       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7662       bool IsFirstComponentList, bool IsImplicit,
7663       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7664       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7665       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7666           OverlappedElements = llvm::None) const {
7667     // The following summarizes what has to be generated for each map and the
7668     // types below. The generated information is expressed in this order:
7669     // base pointer, section pointer, size, flags
7670     // (to add to the ones that come from the map type and modifier).
7671     //
7672     // double d;
7673     // int i[100];
7674     // float *p;
7675     //
7676     // struct S1 {
7677     //   int i;
7678     //   float f[50];
7679     // }
7680     // struct S2 {
7681     //   int i;
7682     //   float f[50];
7683     //   S1 s;
7684     //   double *p;
7685     //   struct S2 *ps;
7686     //   int &ref;
7687     // }
7688     // S2 s;
7689     // S2 *ps;
7690     //
7691     // map(d)
7692     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7693     //
7694     // map(i)
7695     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7696     //
7697     // map(i[1:23])
7698     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7699     //
7700     // map(p)
7701     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7702     //
7703     // map(p[1:24])
7704     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7705     // in unified shared memory mode or for local pointers
7706     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7707     //
7708     // map(s)
7709     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7710     //
7711     // map(s.i)
7712     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7713     //
7714     // map(s.s.f)
7715     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7716     //
7717     // map(s.p)
7718     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7719     //
7720     // map(to: s.p[:22])
7721     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7722     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7723     // &(s.p), &(s.p[0]), 22*sizeof(double),
7724     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7725     // (*) alloc space for struct members, only this is a target parameter
7726     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7727     //      optimizes this entry out, same in the examples below)
7728     // (***) map the pointee (map: to)
7729     //
7730     // map(to: s.ref)
7731     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7732     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7733     // (*) alloc space for struct members, only this is a target parameter
7734     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7735     //      optimizes this entry out, same in the examples below)
7736     // (***) map the pointee (map: to)
7737     //
7738     // map(s.ps)
7739     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7740     //
7741     // map(from: s.ps->s.i)
7742     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7743     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7744     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7745     //
7746     // map(to: s.ps->ps)
7747     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7748     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7749     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7750     //
7751     // map(s.ps->ps->ps)
7752     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7753     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7754     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7755     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7756     //
7757     // map(to: s.ps->ps->s.f[:22])
7758     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7759     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7760     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7761     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7762     //
7763     // map(ps)
7764     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7765     //
7766     // map(ps->i)
7767     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7768     //
7769     // map(ps->s.f)
7770     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7771     //
7772     // map(from: ps->p)
7773     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7774     //
7775     // map(to: ps->p[:22])
7776     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7777     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7778     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7779     //
7780     // map(ps->ps)
7781     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7782     //
7783     // map(from: ps->ps->s.i)
7784     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7785     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7786     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7787     //
7788     // map(from: ps->ps->ps)
7789     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7790     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7791     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7792     //
7793     // map(ps->ps->ps->ps)
7794     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7795     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7796     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7797     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7798     //
7799     // map(to: ps->ps->ps->s.f[:22])
7800     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7801     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7802     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7803     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7804     //
7805     // map(to: s.f[:22]) map(from: s.p[:33])
7806     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7807     //     sizeof(double*) (**), TARGET_PARAM
7808     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7809     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7810     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7811     // (*) allocate contiguous space needed to fit all mapped members even if
7812     //     we allocate space for members not mapped (in this example,
7813     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7814     //     them as well because they fall between &s.f[0] and &s.p)
7815     //
7816     // map(from: s.f[:22]) map(to: ps->p[:33])
7817     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7818     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7819     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7820     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7821     // (*) the struct this entry pertains to is the 2nd element in the list of
7822     //     arguments, hence MEMBER_OF(2)
7823     //
7824     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7825     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7826     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7827     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7828     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7829     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7830     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7831     // (*) the struct this entry pertains to is the 4th element in the list
7832     //     of arguments, hence MEMBER_OF(4)
7833 
7834     // Track if the map information being generated is the first for a capture.
7835     bool IsCaptureFirstInfo = IsFirstComponentList;
7836     // When the variable is on a declare target link or in a to clause with
7837     // unified memory, a reference is needed to hold the host/device address
7838     // of the variable.
7839     bool RequiresReference = false;
7840 
7841     // Scan the components from the base to the complete expression.
7842     auto CI = Components.rbegin();
7843     auto CE = Components.rend();
7844     auto I = CI;
7845 
7846     // Track if the map information being generated is the first for a list of
7847     // components.
7848     bool IsExpressionFirstInfo = true;
7849     bool FirstPointerInComplexData = false;
7850     Address BP = Address::invalid();
7851     const Expr *AssocExpr = I->getAssociatedExpression();
7852     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7853     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7854     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7855 
7856     if (isa<MemberExpr>(AssocExpr)) {
7857       // The base is the 'this' pointer. The content of the pointer is going
7858       // to be the base of the field being mapped.
7859       BP = CGF.LoadCXXThisAddress();
7860     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7861                (OASE &&
7862                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7863       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7864     } else if (OAShE &&
7865                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7866       BP = Address(
7867           CGF.EmitScalarExpr(OAShE->getBase()),
7868           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7869     } else {
7870       // The base is the reference to the variable.
7871       // BP = &Var.
7872       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7873       if (const auto *VD =
7874               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7875         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7876                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7877           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7878               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7879                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7880             RequiresReference = true;
7881             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7882           }
7883         }
7884       }
7885 
7886       // If the variable is a pointer and is being dereferenced (i.e. is not
7887       // the last component), the base has to be the pointer itself, not its
7888       // reference. References are ignored for mapping purposes.
7889       QualType Ty =
7890           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7891       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7892         // No need to generate individual map information for the pointer, it
7893         // can be associated with the combined storage if shared memory mode is
7894         // active or the base declaration is not global variable.
7895         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7896         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7897             !VD || VD->hasLocalStorage())
7898           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7899         else
7900           FirstPointerInComplexData = true;
7901         ++I;
7902       }
7903     }
7904 
7905     // Track whether a component of the list should be marked as MEMBER_OF some
7906     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7907     // in a component list should be marked as MEMBER_OF, all subsequent entries
7908     // do not belong to the base struct. E.g.
7909     // struct S2 s;
7910     // s.ps->ps->ps->f[:]
7911     //   (1) (2) (3) (4)
7912     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7913     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7914     // is the pointee of ps(2) which is not member of struct s, so it should not
7915     // be marked as such (it is still PTR_AND_OBJ).
7916     // The variable is initialized to false so that PTR_AND_OBJ entries which
7917     // are not struct members are not considered (e.g. array of pointers to
7918     // data).
7919     bool ShouldBeMemberOf = false;
7920 
7921     // Variable keeping track of whether or not we have encountered a component
7922     // in the component list which is a member expression. Useful when we have a
7923     // pointer or a final array section, in which case it is the previous
7924     // component in the list which tells us whether we have a member expression.
7925     // E.g. X.f[:]
7926     // While processing the final array section "[:]" it is "f" which tells us
7927     // whether we are dealing with a member of a declared struct.
7928     const MemberExpr *EncounteredME = nullptr;
7929 
7930     // Track for the total number of dimension. Start from one for the dummy
7931     // dimension.
7932     uint64_t DimSize = 1;
7933 
7934     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7935     bool IsPrevMemberReference = false;
7936 
7937     for (; I != CE; ++I) {
7938       // If the current component is member of a struct (parent struct) mark it.
7939       if (!EncounteredME) {
7940         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7941         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7942         // as MEMBER_OF the parent struct.
7943         if (EncounteredME) {
7944           ShouldBeMemberOf = true;
7945           // Do not emit as complex pointer if this is actually not array-like
7946           // expression.
7947           if (FirstPointerInComplexData) {
7948             QualType Ty = std::prev(I)
7949                               ->getAssociatedDeclaration()
7950                               ->getType()
7951                               .getNonReferenceType();
7952             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7953             FirstPointerInComplexData = false;
7954           }
7955         }
7956       }
7957 
7958       auto Next = std::next(I);
7959 
7960       // We need to generate the addresses and sizes if this is the last
7961       // component, if the component is a pointer or if it is an array section
7962       // whose length can't be proved to be one. If this is a pointer, it
7963       // becomes the base address for the following components.
7964 
7965       // A final array section, is one whose length can't be proved to be one.
7966       // If the map item is non-contiguous then we don't treat any array section
7967       // as final array section.
7968       bool IsFinalArraySection =
7969           !IsNonContiguous &&
7970           isFinalArraySectionExpression(I->getAssociatedExpression());
7971 
7972       // If we have a declaration for the mapping use that, otherwise use
7973       // the base declaration of the map clause.
7974       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7975                                      ? I->getAssociatedDeclaration()
7976                                      : BaseDecl;
7977       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7978                                                : MapExpr;
7979 
7980       // Get information on whether the element is a pointer. Have to do a
7981       // special treatment for array sections given that they are built-in
7982       // types.
7983       const auto *OASE =
7984           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7985       const auto *OAShE =
7986           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7987       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7988       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7989       bool IsPointer =
7990           OAShE ||
7991           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7992                        .getCanonicalType()
7993                        ->isAnyPointerType()) ||
7994           I->getAssociatedExpression()->getType()->isAnyPointerType();
7995       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7996                                MapDecl &&
7997                                MapDecl->getType()->isLValueReferenceType();
7998       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7999 
8000       if (OASE)
8001         ++DimSize;
8002 
8003       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8004           IsFinalArraySection) {
8005         // If this is not the last component, we expect the pointer to be
8006         // associated with an array expression or member expression.
8007         assert((Next == CE ||
8008                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8009                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8010                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8011                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8012                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8013                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8014                "Unexpected expression");
8015 
8016         Address LB = Address::invalid();
8017         Address LowestElem = Address::invalid();
8018         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8019                                        const MemberExpr *E) {
8020           const Expr *BaseExpr = E->getBase();
8021           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8022           // scalar.
8023           LValue BaseLV;
8024           if (E->isArrow()) {
8025             LValueBaseInfo BaseInfo;
8026             TBAAAccessInfo TBAAInfo;
8027             Address Addr =
8028                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8029             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8030             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8031           } else {
8032             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8033           }
8034           return BaseLV;
8035         };
8036         if (OAShE) {
8037           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8038                                     CGF.getContext().getTypeAlignInChars(
8039                                         OAShE->getBase()->getType()));
8040         } else if (IsMemberReference) {
8041           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8042           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8043           LowestElem = CGF.EmitLValueForFieldInitialization(
8044                               BaseLVal, cast<FieldDecl>(MapDecl))
8045                            .getAddress(CGF);
8046           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8047                    .getAddress(CGF);
8048         } else {
8049           LowestElem = LB =
8050               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8051                   .getAddress(CGF);
8052         }
8053 
8054         // If this component is a pointer inside the base struct then we don't
8055         // need to create any entry for it - it will be combined with the object
8056         // it is pointing to into a single PTR_AND_OBJ entry.
8057         bool IsMemberPointerOrAddr =
8058             EncounteredME &&
8059             (((IsPointer || ForDeviceAddr) &&
8060               I->getAssociatedExpression() == EncounteredME) ||
8061              (IsPrevMemberReference && !IsPointer) ||
8062              (IsMemberReference && Next != CE &&
8063               !Next->getAssociatedExpression()->getType()->isPointerType()));
8064         if (!OverlappedElements.empty() && Next == CE) {
8065           // Handle base element with the info for overlapped elements.
8066           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8067           assert(!IsPointer &&
8068                  "Unexpected base element with the pointer type.");
8069           // Mark the whole struct as the struct that requires allocation on the
8070           // device.
8071           PartialStruct.LowestElem = {0, LowestElem};
8072           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8073               I->getAssociatedExpression()->getType());
8074           Address HB = CGF.Builder.CreateConstGEP(
8075               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8076                                                               CGF.VoidPtrTy),
8077               TypeSize.getQuantity() - 1);
8078           PartialStruct.HighestElem = {
8079               std::numeric_limits<decltype(
8080                   PartialStruct.HighestElem.first)>::max(),
8081               HB};
8082           PartialStruct.Base = BP;
8083           PartialStruct.LB = LB;
8084           assert(
8085               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8086               "Overlapped elements must be used only once for the variable.");
8087           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8088           // Emit data for non-overlapped data.
8089           OpenMPOffloadMappingFlags Flags =
8090               OMP_MAP_MEMBER_OF |
8091               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8092                              /*AddPtrFlag=*/false,
8093                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8094           llvm::Value *Size = nullptr;
8095           // Do bitcopy of all non-overlapped structure elements.
8096           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8097                    Component : OverlappedElements) {
8098             Address ComponentLB = Address::invalid();
8099             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8100                  Component) {
8101               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8102                 const auto *FD = dyn_cast<FieldDecl>(VD);
8103                 if (FD && FD->getType()->isLValueReferenceType()) {
8104                   const auto *ME =
8105                       cast<MemberExpr>(MC.getAssociatedExpression());
8106                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8107                   ComponentLB =
8108                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8109                           .getAddress(CGF);
8110                 } else {
8111                   ComponentLB =
8112                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8113                           .getAddress(CGF);
8114                 }
8115                 Size = CGF.Builder.CreatePtrDiff(
8116                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8117                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8118                 break;
8119               }
8120             }
8121             assert(Size && "Failed to determine structure size");
8122             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8123             CombinedInfo.BasePointers.push_back(BP.getPointer());
8124             CombinedInfo.Pointers.push_back(LB.getPointer());
8125             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8126                 Size, CGF.Int64Ty, /*isSigned=*/true));
8127             CombinedInfo.Types.push_back(Flags);
8128             CombinedInfo.Mappers.push_back(nullptr);
8129             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8130                                                                       : 1);
8131             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8132           }
8133           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8134           CombinedInfo.BasePointers.push_back(BP.getPointer());
8135           CombinedInfo.Pointers.push_back(LB.getPointer());
8136           Size = CGF.Builder.CreatePtrDiff(
8137               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8138               CGF.EmitCastToVoidPtr(LB.getPointer()));
8139           CombinedInfo.Sizes.push_back(
8140               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8141           CombinedInfo.Types.push_back(Flags);
8142           CombinedInfo.Mappers.push_back(nullptr);
8143           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8144                                                                     : 1);
8145           break;
8146         }
8147         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8148         if (!IsMemberPointerOrAddr ||
8149             (Next == CE && MapType != OMPC_MAP_unknown)) {
8150           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8151           CombinedInfo.BasePointers.push_back(BP.getPointer());
8152           CombinedInfo.Pointers.push_back(LB.getPointer());
8153           CombinedInfo.Sizes.push_back(
8154               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8155           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8156                                                                     : 1);
8157 
8158           // If Mapper is valid, the last component inherits the mapper.
8159           bool HasMapper = Mapper && Next == CE;
8160           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8161 
8162           // We need to add a pointer flag for each map that comes from the
8163           // same expression except for the first one. We also need to signal
8164           // this map is the first one that relates with the current capture
8165           // (there is a set of entries for each capture).
8166           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8167               MapType, MapModifiers, MotionModifiers, IsImplicit,
8168               !IsExpressionFirstInfo || RequiresReference ||
8169                   FirstPointerInComplexData || IsMemberReference,
8170               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8171 
8172           if (!IsExpressionFirstInfo || IsMemberReference) {
8173             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8174             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8175             if (IsPointer || (IsMemberReference && Next != CE))
8176               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8177                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8178 
8179             if (ShouldBeMemberOf) {
8180               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8181               // should be later updated with the correct value of MEMBER_OF.
8182               Flags |= OMP_MAP_MEMBER_OF;
8183               // From now on, all subsequent PTR_AND_OBJ entries should not be
8184               // marked as MEMBER_OF.
8185               ShouldBeMemberOf = false;
8186             }
8187           }
8188 
8189           CombinedInfo.Types.push_back(Flags);
8190         }
8191 
8192         // If we have encountered a member expression so far, keep track of the
8193         // mapped member. If the parent is "*this", then the value declaration
8194         // is nullptr.
8195         if (EncounteredME) {
8196           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8197           unsigned FieldIndex = FD->getFieldIndex();
8198 
8199           // Update info about the lowest and highest elements for this struct
8200           if (!PartialStruct.Base.isValid()) {
8201             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8202             if (IsFinalArraySection) {
8203               Address HB =
8204                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8205                       .getAddress(CGF);
8206               PartialStruct.HighestElem = {FieldIndex, HB};
8207             } else {
8208               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8209             }
8210             PartialStruct.Base = BP;
8211             PartialStruct.LB = BP;
8212           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8213             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8214           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8215             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8216           }
8217         }
8218 
8219         // Need to emit combined struct for array sections.
8220         if (IsFinalArraySection || IsNonContiguous)
8221           PartialStruct.IsArraySection = true;
8222 
8223         // If we have a final array section, we are done with this expression.
8224         if (IsFinalArraySection)
8225           break;
8226 
8227         // The pointer becomes the base for the next element.
8228         if (Next != CE)
8229           BP = IsMemberReference ? LowestElem : LB;
8230 
8231         IsExpressionFirstInfo = false;
8232         IsCaptureFirstInfo = false;
8233         FirstPointerInComplexData = false;
8234         IsPrevMemberReference = IsMemberReference;
8235       } else if (FirstPointerInComplexData) {
8236         QualType Ty = Components.rbegin()
8237                           ->getAssociatedDeclaration()
8238                           ->getType()
8239                           .getNonReferenceType();
8240         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8241         FirstPointerInComplexData = false;
8242       }
8243     }
8244     // If ran into the whole component - allocate the space for the whole
8245     // record.
8246     if (!EncounteredME)
8247       PartialStruct.HasCompleteRecord = true;
8248 
8249     if (!IsNonContiguous)
8250       return;
8251 
8252     const ASTContext &Context = CGF.getContext();
8253 
8254     // For supporting stride in array section, we need to initialize the first
8255     // dimension size as 1, first offset as 0, and first count as 1
8256     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8257     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8258     MapValuesArrayTy CurStrides;
8259     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8260     uint64_t ElementTypeSize;
8261 
8262     // Collect Size information for each dimension and get the element size as
8263     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8264     // should be [10, 10] and the first stride is 4 btyes.
8265     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8266          Components) {
8267       const Expr *AssocExpr = Component.getAssociatedExpression();
8268       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8269 
8270       if (!OASE)
8271         continue;
8272 
8273       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8274       auto *CAT = Context.getAsConstantArrayType(Ty);
8275       auto *VAT = Context.getAsVariableArrayType(Ty);
8276 
8277       // We need all the dimension size except for the last dimension.
8278       assert((VAT || CAT || &Component == &*Components.begin()) &&
8279              "Should be either ConstantArray or VariableArray if not the "
8280              "first Component");
8281 
8282       // Get element size if CurStrides is empty.
8283       if (CurStrides.empty()) {
8284         const Type *ElementType = nullptr;
8285         if (CAT)
8286           ElementType = CAT->getElementType().getTypePtr();
8287         else if (VAT)
8288           ElementType = VAT->getElementType().getTypePtr();
8289         else
8290           assert(&Component == &*Components.begin() &&
8291                  "Only expect pointer (non CAT or VAT) when this is the "
8292                  "first Component");
8293         // If ElementType is null, then it means the base is a pointer
8294         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8295         // for next iteration.
8296         if (ElementType) {
8297           // For the case that having pointer as base, we need to remove one
8298           // level of indirection.
8299           if (&Component != &*Components.begin())
8300             ElementType = ElementType->getPointeeOrArrayElementType();
8301           ElementTypeSize =
8302               Context.getTypeSizeInChars(ElementType).getQuantity();
8303           CurStrides.push_back(
8304               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8305         }
8306       }
8307       // Get dimension value except for the last dimension since we don't need
8308       // it.
8309       if (DimSizes.size() < Components.size() - 1) {
8310         if (CAT)
8311           DimSizes.push_back(llvm::ConstantInt::get(
8312               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8313         else if (VAT)
8314           DimSizes.push_back(CGF.Builder.CreateIntCast(
8315               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8316               /*IsSigned=*/false));
8317       }
8318     }
8319 
8320     // Skip the dummy dimension since we have already have its information.
8321     auto DI = DimSizes.begin() + 1;
8322     // Product of dimension.
8323     llvm::Value *DimProd =
8324         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8325 
8326     // Collect info for non-contiguous. Notice that offset, count, and stride
8327     // are only meaningful for array-section, so we insert a null for anything
8328     // other than array-section.
8329     // Also, the size of offset, count, and stride are not the same as
8330     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8331     // count, and stride are the same as the number of non-contiguous
8332     // declaration in target update to/from clause.
8333     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8334          Components) {
8335       const Expr *AssocExpr = Component.getAssociatedExpression();
8336 
8337       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8338         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8339             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8340             /*isSigned=*/false);
8341         CurOffsets.push_back(Offset);
8342         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8343         CurStrides.push_back(CurStrides.back());
8344         continue;
8345       }
8346 
8347       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8348 
8349       if (!OASE)
8350         continue;
8351 
8352       // Offset
8353       const Expr *OffsetExpr = OASE->getLowerBound();
8354       llvm::Value *Offset = nullptr;
8355       if (!OffsetExpr) {
8356         // If offset is absent, then we just set it to zero.
8357         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8358       } else {
8359         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8360                                            CGF.Int64Ty,
8361                                            /*isSigned=*/false);
8362       }
8363       CurOffsets.push_back(Offset);
8364 
8365       // Count
8366       const Expr *CountExpr = OASE->getLength();
8367       llvm::Value *Count = nullptr;
8368       if (!CountExpr) {
8369         // In Clang, once a high dimension is an array section, we construct all
8370         // the lower dimension as array section, however, for case like
8371         // arr[0:2][2], Clang construct the inner dimension as an array section
8372         // but it actually is not in an array section form according to spec.
8373         if (!OASE->getColonLocFirst().isValid() &&
8374             !OASE->getColonLocSecond().isValid()) {
8375           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8376         } else {
8377           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8378           // When the length is absent it defaults to ⌈(size −
8379           // lower-bound)/stride⌉, where size is the size of the array
8380           // dimension.
8381           const Expr *StrideExpr = OASE->getStride();
8382           llvm::Value *Stride =
8383               StrideExpr
8384                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8385                                               CGF.Int64Ty, /*isSigned=*/false)
8386                   : nullptr;
8387           if (Stride)
8388             Count = CGF.Builder.CreateUDiv(
8389                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8390           else
8391             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8392         }
8393       } else {
8394         Count = CGF.EmitScalarExpr(CountExpr);
8395       }
8396       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8397       CurCounts.push_back(Count);
8398 
8399       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8400       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8401       //              Offset      Count     Stride
8402       //    D0          0           1         4    (int)    <- dummy dimension
8403       //    D1          0           2         8    (2 * (1) * 4)
8404       //    D2          1           2         20   (1 * (1 * 5) * 4)
8405       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8406       const Expr *StrideExpr = OASE->getStride();
8407       llvm::Value *Stride =
8408           StrideExpr
8409               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8410                                           CGF.Int64Ty, /*isSigned=*/false)
8411               : nullptr;
8412       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8413       if (Stride)
8414         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8415       else
8416         CurStrides.push_back(DimProd);
8417       if (DI != DimSizes.end())
8418         ++DI;
8419     }
8420 
8421     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8422     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8423     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8424   }
8425 
8426   /// Return the adjusted map modifiers if the declaration a capture refers to
8427   /// appears in a first-private clause. This is expected to be used only with
8428   /// directives that start with 'target'.
8429   MappableExprsHandler::OpenMPOffloadMappingFlags
8430   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8431     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8432 
8433     // A first private variable captured by reference will use only the
8434     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8435     // declaration is known as first-private in this handler.
8436     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8437       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8438         return MappableExprsHandler::OMP_MAP_TO |
8439                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8440       return MappableExprsHandler::OMP_MAP_PRIVATE |
8441              MappableExprsHandler::OMP_MAP_TO;
8442     }
8443     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8444     if (I != LambdasMap.end())
8445       // for map(to: lambda): using user specified map type.
8446       return getMapTypeBits(
8447           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8448           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8449           /*AddPtrFlag=*/false,
8450           /*AddIsTargetParamFlag=*/false,
8451           /*isNonContiguous=*/false);
8452     return MappableExprsHandler::OMP_MAP_TO |
8453            MappableExprsHandler::OMP_MAP_FROM;
8454   }
8455 
8456   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8457     // Rotate by getFlagMemberOffset() bits.
8458     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8459                                                   << getFlagMemberOffset());
8460   }
8461 
8462   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8463                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8464     // If the entry is PTR_AND_OBJ but has not been marked with the special
8465     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8466     // marked as MEMBER_OF.
8467     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8468         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8469       return;
8470 
8471     // Reset the placeholder value to prepare the flag for the assignment of the
8472     // proper MEMBER_OF value.
8473     Flags &= ~OMP_MAP_MEMBER_OF;
8474     Flags |= MemberOfFlag;
8475   }
8476 
8477   void getPlainLayout(const CXXRecordDecl *RD,
8478                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8479                       bool AsBase) const {
8480     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8481 
8482     llvm::StructType *St =
8483         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8484 
8485     unsigned NumElements = St->getNumElements();
8486     llvm::SmallVector<
8487         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8488         RecordLayout(NumElements);
8489 
8490     // Fill bases.
8491     for (const auto &I : RD->bases()) {
8492       if (I.isVirtual())
8493         continue;
8494       const auto *Base = I.getType()->getAsCXXRecordDecl();
8495       // Ignore empty bases.
8496       if (Base->isEmpty() || CGF.getContext()
8497                                  .getASTRecordLayout(Base)
8498                                  .getNonVirtualSize()
8499                                  .isZero())
8500         continue;
8501 
8502       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8503       RecordLayout[FieldIndex] = Base;
8504     }
8505     // Fill in virtual bases.
8506     for (const auto &I : RD->vbases()) {
8507       const auto *Base = I.getType()->getAsCXXRecordDecl();
8508       // Ignore empty bases.
8509       if (Base->isEmpty())
8510         continue;
8511       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8512       if (RecordLayout[FieldIndex])
8513         continue;
8514       RecordLayout[FieldIndex] = Base;
8515     }
8516     // Fill in all the fields.
8517     assert(!RD->isUnion() && "Unexpected union.");
8518     for (const auto *Field : RD->fields()) {
8519       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8520       // will fill in later.)
8521       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8522         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8523         RecordLayout[FieldIndex] = Field;
8524       }
8525     }
8526     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8527              &Data : RecordLayout) {
8528       if (Data.isNull())
8529         continue;
8530       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8531         getPlainLayout(Base, Layout, /*AsBase=*/true);
8532       else
8533         Layout.push_back(Data.get<const FieldDecl *>());
8534     }
8535   }
8536 
8537   /// Generate all the base pointers, section pointers, sizes, map types, and
8538   /// mappers for the extracted mappable expressions (all included in \a
8539   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8540   /// pair of the relevant declaration and index where it occurs is appended to
8541   /// the device pointers info array.
8542   void generateAllInfoForClauses(
8543       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8544       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8545           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8546     // We have to process the component lists that relate with the same
8547     // declaration in a single chunk so that we can generate the map flags
8548     // correctly. Therefore, we organize all lists in a map.
8549     enum MapKind { Present, Allocs, Other, Total };
8550     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8551                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8552         Info;
8553 
8554     // Helper function to fill the information map for the different supported
8555     // clauses.
8556     auto &&InfoGen =
8557         [&Info, &SkipVarSet](
8558             const ValueDecl *D, MapKind Kind,
8559             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8560             OpenMPMapClauseKind MapType,
8561             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8562             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8563             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8564             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8565           if (SkipVarSet.contains(D))
8566             return;
8567           auto It = Info.find(D);
8568           if (It == Info.end())
8569             It = Info
8570                      .insert(std::make_pair(
8571                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8572                      .first;
8573           It->second[Kind].emplace_back(
8574               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8575               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8576         };
8577 
8578     for (const auto *Cl : Clauses) {
8579       const auto *C = dyn_cast<OMPMapClause>(Cl);
8580       if (!C)
8581         continue;
8582       MapKind Kind = Other;
8583       if (llvm::is_contained(C->getMapTypeModifiers(),
8584                              OMPC_MAP_MODIFIER_present))
8585         Kind = Present;
8586       else if (C->getMapType() == OMPC_MAP_alloc)
8587         Kind = Allocs;
8588       const auto *EI = C->getVarRefs().begin();
8589       for (const auto L : C->component_lists()) {
8590         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8591         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8592                 C->getMapTypeModifiers(), llvm::None,
8593                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8594                 E);
8595         ++EI;
8596       }
8597     }
8598     for (const auto *Cl : Clauses) {
8599       const auto *C = dyn_cast<OMPToClause>(Cl);
8600       if (!C)
8601         continue;
8602       MapKind Kind = Other;
8603       if (llvm::is_contained(C->getMotionModifiers(),
8604                              OMPC_MOTION_MODIFIER_present))
8605         Kind = Present;
8606       const auto *EI = C->getVarRefs().begin();
8607       for (const auto L : C->component_lists()) {
8608         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8609                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8610                 C->isImplicit(), std::get<2>(L), *EI);
8611         ++EI;
8612       }
8613     }
8614     for (const auto *Cl : Clauses) {
8615       const auto *C = dyn_cast<OMPFromClause>(Cl);
8616       if (!C)
8617         continue;
8618       MapKind Kind = Other;
8619       if (llvm::is_contained(C->getMotionModifiers(),
8620                              OMPC_MOTION_MODIFIER_present))
8621         Kind = Present;
8622       const auto *EI = C->getVarRefs().begin();
8623       for (const auto L : C->component_lists()) {
8624         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8625                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8626                 C->isImplicit(), std::get<2>(L), *EI);
8627         ++EI;
8628       }
8629     }
8630 
8631     // Look at the use_device_ptr clause information and mark the existing map
8632     // entries as such. If there is no map information for an entry in the
8633     // use_device_ptr list, we create one with map type 'alloc' and zero size
8634     // section. It is the user fault if that was not mapped before. If there is
8635     // no map information and the pointer is a struct member, then we defer the
8636     // emission of that entry until the whole struct has been processed.
8637     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8638                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8639         DeferredInfo;
8640     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8641 
8642     for (const auto *Cl : Clauses) {
8643       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8644       if (!C)
8645         continue;
8646       for (const auto L : C->component_lists()) {
8647         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8648             std::get<1>(L);
8649         assert(!Components.empty() &&
8650                "Not expecting empty list of components!");
8651         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8652         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8653         const Expr *IE = Components.back().getAssociatedExpression();
8654         // If the first component is a member expression, we have to look into
8655         // 'this', which maps to null in the map of map information. Otherwise
8656         // look directly for the information.
8657         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8658 
8659         // We potentially have map information for this declaration already.
8660         // Look for the first set of components that refer to it.
8661         if (It != Info.end()) {
8662           bool Found = false;
8663           for (auto &Data : It->second) {
8664             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8665               return MI.Components.back().getAssociatedDeclaration() == VD;
8666             });
8667             // If we found a map entry, signal that the pointer has to be
8668             // returned and move on to the next declaration. Exclude cases where
8669             // the base pointer is mapped as array subscript, array section or
8670             // array shaping. The base address is passed as a pointer to base in
8671             // this case and cannot be used as a base for use_device_ptr list
8672             // item.
8673             if (CI != Data.end()) {
8674               auto PrevCI = std::next(CI->Components.rbegin());
8675               const auto *VarD = dyn_cast<VarDecl>(VD);
8676               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8677                   isa<MemberExpr>(IE) ||
8678                   !VD->getType().getNonReferenceType()->isPointerType() ||
8679                   PrevCI == CI->Components.rend() ||
8680                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8681                   VarD->hasLocalStorage()) {
8682                 CI->ReturnDevicePointer = true;
8683                 Found = true;
8684                 break;
8685               }
8686             }
8687           }
8688           if (Found)
8689             continue;
8690         }
8691 
8692         // We didn't find any match in our map information - generate a zero
8693         // size array section - if the pointer is a struct member we defer this
8694         // action until the whole struct has been processed.
8695         if (isa<MemberExpr>(IE)) {
8696           // Insert the pointer into Info to be processed by
8697           // generateInfoForComponentList. Because it is a member pointer
8698           // without a pointee, no entry will be generated for it, therefore
8699           // we need to generate one after the whole struct has been processed.
8700           // Nonetheless, generateInfoForComponentList must be called to take
8701           // the pointer into account for the calculation of the range of the
8702           // partial struct.
8703           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8704                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8705                   nullptr);
8706           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8707         } else {
8708           llvm::Value *Ptr =
8709               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8710           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8711           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8712           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8713           UseDevicePtrCombinedInfo.Sizes.push_back(
8714               llvm::Constant::getNullValue(CGF.Int64Ty));
8715           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8716           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8717         }
8718       }
8719     }
8720 
8721     // Look at the use_device_addr clause information and mark the existing map
8722     // entries as such. If there is no map information for an entry in the
8723     // use_device_addr list, we create one with map type 'alloc' and zero size
8724     // section. It is the user fault if that was not mapped before. If there is
8725     // no map information and the pointer is a struct member, then we defer the
8726     // emission of that entry until the whole struct has been processed.
8727     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8728     for (const auto *Cl : Clauses) {
8729       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8730       if (!C)
8731         continue;
8732       for (const auto L : C->component_lists()) {
8733         assert(!std::get<1>(L).empty() &&
8734                "Not expecting empty list of components!");
8735         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8736         if (!Processed.insert(VD).second)
8737           continue;
8738         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8739         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8740         // If the first component is a member expression, we have to look into
8741         // 'this', which maps to null in the map of map information. Otherwise
8742         // look directly for the information.
8743         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8744 
8745         // We potentially have map information for this declaration already.
8746         // Look for the first set of components that refer to it.
8747         if (It != Info.end()) {
8748           bool Found = false;
8749           for (auto &Data : It->second) {
8750             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8751               return MI.Components.back().getAssociatedDeclaration() == VD;
8752             });
8753             // If we found a map entry, signal that the pointer has to be
8754             // returned and move on to the next declaration.
8755             if (CI != Data.end()) {
8756               CI->ReturnDevicePointer = true;
8757               Found = true;
8758               break;
8759             }
8760           }
8761           if (Found)
8762             continue;
8763         }
8764 
8765         // We didn't find any match in our map information - generate a zero
8766         // size array section - if the pointer is a struct member we defer this
8767         // action until the whole struct has been processed.
8768         if (isa<MemberExpr>(IE)) {
8769           // Insert the pointer into Info to be processed by
8770           // generateInfoForComponentList. Because it is a member pointer
8771           // without a pointee, no entry will be generated for it, therefore
8772           // we need to generate one after the whole struct has been processed.
8773           // Nonetheless, generateInfoForComponentList must be called to take
8774           // the pointer into account for the calculation of the range of the
8775           // partial struct.
8776           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8777                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8778                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8779           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8780         } else {
8781           llvm::Value *Ptr;
8782           if (IE->isGLValue())
8783             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8784           else
8785             Ptr = CGF.EmitScalarExpr(IE);
8786           CombinedInfo.Exprs.push_back(VD);
8787           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8788           CombinedInfo.Pointers.push_back(Ptr);
8789           CombinedInfo.Sizes.push_back(
8790               llvm::Constant::getNullValue(CGF.Int64Ty));
8791           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8792           CombinedInfo.Mappers.push_back(nullptr);
8793         }
8794       }
8795     }
8796 
8797     for (const auto &Data : Info) {
8798       StructRangeInfoTy PartialStruct;
8799       // Temporary generated information.
8800       MapCombinedInfoTy CurInfo;
8801       const Decl *D = Data.first;
8802       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8803       for (const auto &M : Data.second) {
8804         for (const MapInfo &L : M) {
8805           assert(!L.Components.empty() &&
8806                  "Not expecting declaration with no component lists.");
8807 
8808           // Remember the current base pointer index.
8809           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8810           CurInfo.NonContigInfo.IsNonContiguous =
8811               L.Components.back().isNonContiguous();
8812           generateInfoForComponentList(
8813               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8814               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8815               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8816 
8817           // If this entry relates with a device pointer, set the relevant
8818           // declaration and add the 'return pointer' flag.
8819           if (L.ReturnDevicePointer) {
8820             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8821                    "Unexpected number of mapped base pointers.");
8822 
8823             const ValueDecl *RelevantVD =
8824                 L.Components.back().getAssociatedDeclaration();
8825             assert(RelevantVD &&
8826                    "No relevant declaration related with device pointer??");
8827 
8828             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8829                 RelevantVD);
8830             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8831           }
8832         }
8833       }
8834 
8835       // Append any pending zero-length pointers which are struct members and
8836       // used with use_device_ptr or use_device_addr.
8837       auto CI = DeferredInfo.find(Data.first);
8838       if (CI != DeferredInfo.end()) {
8839         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8840           llvm::Value *BasePtr;
8841           llvm::Value *Ptr;
8842           if (L.ForDeviceAddr) {
8843             if (L.IE->isGLValue())
8844               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8845             else
8846               Ptr = this->CGF.EmitScalarExpr(L.IE);
8847             BasePtr = Ptr;
8848             // Entry is RETURN_PARAM. Also, set the placeholder value
8849             // MEMBER_OF=FFFF so that the entry is later updated with the
8850             // correct value of MEMBER_OF.
8851             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8852           } else {
8853             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8854             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8855                                              L.IE->getExprLoc());
8856             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8857             // placeholder value MEMBER_OF=FFFF so that the entry is later
8858             // updated with the correct value of MEMBER_OF.
8859             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8860                                     OMP_MAP_MEMBER_OF);
8861           }
8862           CurInfo.Exprs.push_back(L.VD);
8863           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8864           CurInfo.Pointers.push_back(Ptr);
8865           CurInfo.Sizes.push_back(
8866               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8867           CurInfo.Mappers.push_back(nullptr);
8868         }
8869       }
8870       // If there is an entry in PartialStruct it means we have a struct with
8871       // individual members mapped. Emit an extra combined entry.
8872       if (PartialStruct.Base.isValid()) {
8873         CurInfo.NonContigInfo.Dims.push_back(0);
8874         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8875       }
8876 
8877       // We need to append the results of this capture to what we already
8878       // have.
8879       CombinedInfo.append(CurInfo);
8880     }
8881     // Append data for use_device_ptr clauses.
8882     CombinedInfo.append(UseDevicePtrCombinedInfo);
8883   }
8884 
8885 public:
8886   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8887       : CurDir(&Dir), CGF(CGF) {
8888     // Extract firstprivate clause information.
8889     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8890       for (const auto *D : C->varlists())
8891         FirstPrivateDecls.try_emplace(
8892             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8893     // Extract implicit firstprivates from uses_allocators clauses.
8894     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8895       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8896         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8897         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8898           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8899                                         /*Implicit=*/true);
8900         else if (const auto *VD = dyn_cast<VarDecl>(
8901                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8902                          ->getDecl()))
8903           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8904       }
8905     }
8906     // Extract device pointer clause information.
8907     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8908       for (auto L : C->component_lists())
8909         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8910     // Extract map information.
8911     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8912       if (C->getMapType() != OMPC_MAP_to)
8913         continue;
8914       for (auto L : C->component_lists()) {
8915         const ValueDecl *VD = std::get<0>(L);
8916         const auto *RD = VD ? VD->getType()
8917                                   .getCanonicalType()
8918                                   .getNonReferenceType()
8919                                   ->getAsCXXRecordDecl()
8920                             : nullptr;
8921         if (RD && RD->isLambda())
8922           LambdasMap.try_emplace(std::get<0>(L), C);
8923       }
8924     }
8925   }
8926 
8927   /// Constructor for the declare mapper directive.
8928   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8929       : CurDir(&Dir), CGF(CGF) {}
8930 
8931   /// Generate code for the combined entry if we have a partially mapped struct
8932   /// and take care of the mapping flags of the arguments corresponding to
8933   /// individual struct members.
8934   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8935                          MapFlagsArrayTy &CurTypes,
8936                          const StructRangeInfoTy &PartialStruct,
8937                          const ValueDecl *VD = nullptr,
8938                          bool NotTargetParams = true) const {
8939     if (CurTypes.size() == 1 &&
8940         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8941         !PartialStruct.IsArraySection)
8942       return;
8943     Address LBAddr = PartialStruct.LowestElem.second;
8944     Address HBAddr = PartialStruct.HighestElem.second;
8945     if (PartialStruct.HasCompleteRecord) {
8946       LBAddr = PartialStruct.LB;
8947       HBAddr = PartialStruct.LB;
8948     }
8949     CombinedInfo.Exprs.push_back(VD);
8950     // Base is the base of the struct
8951     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8952     // Pointer is the address of the lowest element
8953     llvm::Value *LB = LBAddr.getPointer();
8954     CombinedInfo.Pointers.push_back(LB);
8955     // There should not be a mapper for a combined entry.
8956     CombinedInfo.Mappers.push_back(nullptr);
8957     // Size is (addr of {highest+1} element) - (addr of lowest element)
8958     llvm::Value *HB = HBAddr.getPointer();
8959     llvm::Value *HAddr =
8960         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8961     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8962     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8963     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8964     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8965                                                   /*isSigned=*/false);
8966     CombinedInfo.Sizes.push_back(Size);
8967     // Map type is always TARGET_PARAM, if generate info for captures.
8968     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8969                                                  : OMP_MAP_TARGET_PARAM);
8970     // If any element has the present modifier, then make sure the runtime
8971     // doesn't attempt to allocate the struct.
8972     if (CurTypes.end() !=
8973         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8974           return Type & OMP_MAP_PRESENT;
8975         }))
8976       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8977     // Remove TARGET_PARAM flag from the first element
8978     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8979     // If any element has the ompx_hold modifier, then make sure the runtime
8980     // uses the hold reference count for the struct as a whole so that it won't
8981     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8982     // elements as well so the runtime knows which reference count to check
8983     // when determining whether it's time for device-to-host transfers of
8984     // individual elements.
8985     if (CurTypes.end() !=
8986         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8987           return Type & OMP_MAP_OMPX_HOLD;
8988         })) {
8989       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8990       for (auto &M : CurTypes)
8991         M |= OMP_MAP_OMPX_HOLD;
8992     }
8993 
8994     // All other current entries will be MEMBER_OF the combined entry
8995     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8996     // 0xFFFF in the MEMBER_OF field).
8997     OpenMPOffloadMappingFlags MemberOfFlag =
8998         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8999     for (auto &M : CurTypes)
9000       setCorrectMemberOfFlag(M, MemberOfFlag);
9001   }
9002 
9003   /// Generate all the base pointers, section pointers, sizes, map types, and
9004   /// mappers for the extracted mappable expressions (all included in \a
9005   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9006   /// pair of the relevant declaration and index where it occurs is appended to
9007   /// the device pointers info array.
9008   void generateAllInfo(
9009       MapCombinedInfoTy &CombinedInfo,
9010       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9011           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9012     assert(CurDir.is<const OMPExecutableDirective *>() &&
9013            "Expect a executable directive");
9014     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9015     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9016   }
9017 
9018   /// Generate all the base pointers, section pointers, sizes, map types, and
9019   /// mappers for the extracted map clauses of user-defined mapper (all included
9020   /// in \a CombinedInfo).
9021   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9022     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9023            "Expect a declare mapper directive");
9024     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9025     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9026   }
9027 
9028   /// Emit capture info for lambdas for variables captured by reference.
9029   void generateInfoForLambdaCaptures(
9030       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9031       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9032     const auto *RD = VD->getType()
9033                          .getCanonicalType()
9034                          .getNonReferenceType()
9035                          ->getAsCXXRecordDecl();
9036     if (!RD || !RD->isLambda())
9037       return;
9038     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
9039     LValue VDLVal = CGF.MakeAddrLValue(
9040         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9041     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9042     FieldDecl *ThisCapture = nullptr;
9043     RD->getCaptureFields(Captures, ThisCapture);
9044     if (ThisCapture) {
9045       LValue ThisLVal =
9046           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9047       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9048       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9049                                  VDLVal.getPointer(CGF));
9050       CombinedInfo.Exprs.push_back(VD);
9051       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9052       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9053       CombinedInfo.Sizes.push_back(
9054           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9055                                     CGF.Int64Ty, /*isSigned=*/true));
9056       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9057                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9058       CombinedInfo.Mappers.push_back(nullptr);
9059     }
9060     for (const LambdaCapture &LC : RD->captures()) {
9061       if (!LC.capturesVariable())
9062         continue;
9063       const VarDecl *VD = LC.getCapturedVar();
9064       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9065         continue;
9066       auto It = Captures.find(VD);
9067       assert(It != Captures.end() && "Found lambda capture without field.");
9068       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9069       if (LC.getCaptureKind() == LCK_ByRef) {
9070         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9071         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9072                                    VDLVal.getPointer(CGF));
9073         CombinedInfo.Exprs.push_back(VD);
9074         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9075         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9076         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9077             CGF.getTypeSize(
9078                 VD->getType().getCanonicalType().getNonReferenceType()),
9079             CGF.Int64Ty, /*isSigned=*/true));
9080       } else {
9081         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9082         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9083                                    VDLVal.getPointer(CGF));
9084         CombinedInfo.Exprs.push_back(VD);
9085         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9086         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9087         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9088       }
9089       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9090                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9091       CombinedInfo.Mappers.push_back(nullptr);
9092     }
9093   }
9094 
9095   /// Set correct indices for lambdas captures.
9096   void adjustMemberOfForLambdaCaptures(
9097       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9098       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9099       MapFlagsArrayTy &Types) const {
9100     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9101       // Set correct member_of idx for all implicit lambda captures.
9102       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9103                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9104         continue;
9105       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9106       assert(BasePtr && "Unable to find base lambda address.");
9107       int TgtIdx = -1;
9108       for (unsigned J = I; J > 0; --J) {
9109         unsigned Idx = J - 1;
9110         if (Pointers[Idx] != BasePtr)
9111           continue;
9112         TgtIdx = Idx;
9113         break;
9114       }
9115       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9116       // All other current entries will be MEMBER_OF the combined entry
9117       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9118       // 0xFFFF in the MEMBER_OF field).
9119       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9120       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9121     }
9122   }
9123 
9124   /// Generate the base pointers, section pointers, sizes, map types, and
9125   /// mappers associated to a given capture (all included in \a CombinedInfo).
9126   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9127                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9128                               StructRangeInfoTy &PartialStruct) const {
9129     assert(!Cap->capturesVariableArrayType() &&
9130            "Not expecting to generate map info for a variable array type!");
9131 
9132     // We need to know when we generating information for the first component
9133     const ValueDecl *VD = Cap->capturesThis()
9134                               ? nullptr
9135                               : Cap->getCapturedVar()->getCanonicalDecl();
9136 
9137     // for map(to: lambda): skip here, processing it in
9138     // generateDefaultMapInfo
9139     if (LambdasMap.count(VD))
9140       return;
9141 
9142     // If this declaration appears in a is_device_ptr clause we just have to
9143     // pass the pointer by value. If it is a reference to a declaration, we just
9144     // pass its value.
9145     if (DevPointersMap.count(VD)) {
9146       CombinedInfo.Exprs.push_back(VD);
9147       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9148       CombinedInfo.Pointers.push_back(Arg);
9149       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9150           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9151           /*isSigned=*/true));
9152       CombinedInfo.Types.push_back(
9153           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9154           OMP_MAP_TARGET_PARAM);
9155       CombinedInfo.Mappers.push_back(nullptr);
9156       return;
9157     }
9158 
9159     using MapData =
9160         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9161                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9162                    const ValueDecl *, const Expr *>;
9163     SmallVector<MapData, 4> DeclComponentLists;
9164     assert(CurDir.is<const OMPExecutableDirective *>() &&
9165            "Expect a executable directive");
9166     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9167     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9168       const auto *EI = C->getVarRefs().begin();
9169       for (const auto L : C->decl_component_lists(VD)) {
9170         const ValueDecl *VDecl, *Mapper;
9171         // The Expression is not correct if the mapping is implicit
9172         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9173         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9174         std::tie(VDecl, Components, Mapper) = L;
9175         assert(VDecl == VD && "We got information for the wrong declaration??");
9176         assert(!Components.empty() &&
9177                "Not expecting declaration with no component lists.");
9178         DeclComponentLists.emplace_back(Components, C->getMapType(),
9179                                         C->getMapTypeModifiers(),
9180                                         C->isImplicit(), Mapper, E);
9181         ++EI;
9182       }
9183     }
9184     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9185                                              const MapData &RHS) {
9186       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9187       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9188       bool HasPresent =
9189           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9190       bool HasAllocs = MapType == OMPC_MAP_alloc;
9191       MapModifiers = std::get<2>(RHS);
9192       MapType = std::get<1>(LHS);
9193       bool HasPresentR =
9194           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9195       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9196       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9197     });
9198 
9199     // Find overlapping elements (including the offset from the base element).
9200     llvm::SmallDenseMap<
9201         const MapData *,
9202         llvm::SmallVector<
9203             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9204         4>
9205         OverlappedData;
9206     size_t Count = 0;
9207     for (const MapData &L : DeclComponentLists) {
9208       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9209       OpenMPMapClauseKind MapType;
9210       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9211       bool IsImplicit;
9212       const ValueDecl *Mapper;
9213       const Expr *VarRef;
9214       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9215           L;
9216       ++Count;
9217       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9218         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9219         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9220                  VarRef) = L1;
9221         auto CI = Components.rbegin();
9222         auto CE = Components.rend();
9223         auto SI = Components1.rbegin();
9224         auto SE = Components1.rend();
9225         for (; CI != CE && SI != SE; ++CI, ++SI) {
9226           if (CI->getAssociatedExpression()->getStmtClass() !=
9227               SI->getAssociatedExpression()->getStmtClass())
9228             break;
9229           // Are we dealing with different variables/fields?
9230           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9231             break;
9232         }
9233         // Found overlapping if, at least for one component, reached the head
9234         // of the components list.
9235         if (CI == CE || SI == SE) {
9236           // Ignore it if it is the same component.
9237           if (CI == CE && SI == SE)
9238             continue;
9239           const auto It = (SI == SE) ? CI : SI;
9240           // If one component is a pointer and another one is a kind of
9241           // dereference of this pointer (array subscript, section, dereference,
9242           // etc.), it is not an overlapping.
9243           // Same, if one component is a base and another component is a
9244           // dereferenced pointer memberexpr with the same base.
9245           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9246               (std::prev(It)->getAssociatedDeclaration() &&
9247                std::prev(It)
9248                    ->getAssociatedDeclaration()
9249                    ->getType()
9250                    ->isPointerType()) ||
9251               (It->getAssociatedDeclaration() &&
9252                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9253                std::next(It) != CE && std::next(It) != SE))
9254             continue;
9255           const MapData &BaseData = CI == CE ? L : L1;
9256           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9257               SI == SE ? Components : Components1;
9258           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9259           OverlappedElements.getSecond().push_back(SubData);
9260         }
9261       }
9262     }
9263     // Sort the overlapped elements for each item.
9264     llvm::SmallVector<const FieldDecl *, 4> Layout;
9265     if (!OverlappedData.empty()) {
9266       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9267       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9268       while (BaseType != OrigType) {
9269         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9270         OrigType = BaseType->getPointeeOrArrayElementType();
9271       }
9272 
9273       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9274         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9275       else {
9276         const auto *RD = BaseType->getAsRecordDecl();
9277         Layout.append(RD->field_begin(), RD->field_end());
9278       }
9279     }
9280     for (auto &Pair : OverlappedData) {
9281       llvm::stable_sort(
9282           Pair.getSecond(),
9283           [&Layout](
9284               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9285               OMPClauseMappableExprCommon::MappableExprComponentListRef
9286                   Second) {
9287             auto CI = First.rbegin();
9288             auto CE = First.rend();
9289             auto SI = Second.rbegin();
9290             auto SE = Second.rend();
9291             for (; CI != CE && SI != SE; ++CI, ++SI) {
9292               if (CI->getAssociatedExpression()->getStmtClass() !=
9293                   SI->getAssociatedExpression()->getStmtClass())
9294                 break;
9295               // Are we dealing with different variables/fields?
9296               if (CI->getAssociatedDeclaration() !=
9297                   SI->getAssociatedDeclaration())
9298                 break;
9299             }
9300 
9301             // Lists contain the same elements.
9302             if (CI == CE && SI == SE)
9303               return false;
9304 
9305             // List with less elements is less than list with more elements.
9306             if (CI == CE || SI == SE)
9307               return CI == CE;
9308 
9309             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9310             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9311             if (FD1->getParent() == FD2->getParent())
9312               return FD1->getFieldIndex() < FD2->getFieldIndex();
9313             const auto *It =
9314                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9315                   return FD == FD1 || FD == FD2;
9316                 });
9317             return *It == FD1;
9318           });
9319     }
9320 
9321     // Associated with a capture, because the mapping flags depend on it.
9322     // Go through all of the elements with the overlapped elements.
9323     bool IsFirstComponentList = true;
9324     for (const auto &Pair : OverlappedData) {
9325       const MapData &L = *Pair.getFirst();
9326       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9327       OpenMPMapClauseKind MapType;
9328       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9329       bool IsImplicit;
9330       const ValueDecl *Mapper;
9331       const Expr *VarRef;
9332       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9333           L;
9334       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9335           OverlappedComponents = Pair.getSecond();
9336       generateInfoForComponentList(
9337           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9338           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9339           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9340       IsFirstComponentList = false;
9341     }
9342     // Go through other elements without overlapped elements.
9343     for (const MapData &L : DeclComponentLists) {
9344       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9345       OpenMPMapClauseKind MapType;
9346       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9347       bool IsImplicit;
9348       const ValueDecl *Mapper;
9349       const Expr *VarRef;
9350       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9351           L;
9352       auto It = OverlappedData.find(&L);
9353       if (It == OverlappedData.end())
9354         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9355                                      Components, CombinedInfo, PartialStruct,
9356                                      IsFirstComponentList, IsImplicit, Mapper,
9357                                      /*ForDeviceAddr=*/false, VD, VarRef);
9358       IsFirstComponentList = false;
9359     }
9360   }
9361 
9362   /// Generate the default map information for a given capture \a CI,
9363   /// record field declaration \a RI and captured value \a CV.
9364   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9365                               const FieldDecl &RI, llvm::Value *CV,
9366                               MapCombinedInfoTy &CombinedInfo) const {
9367     bool IsImplicit = true;
9368     // Do the default mapping.
9369     if (CI.capturesThis()) {
9370       CombinedInfo.Exprs.push_back(nullptr);
9371       CombinedInfo.BasePointers.push_back(CV);
9372       CombinedInfo.Pointers.push_back(CV);
9373       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9374       CombinedInfo.Sizes.push_back(
9375           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9376                                     CGF.Int64Ty, /*isSigned=*/true));
9377       // Default map type.
9378       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9379     } else if (CI.capturesVariableByCopy()) {
9380       const VarDecl *VD = CI.getCapturedVar();
9381       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9382       CombinedInfo.BasePointers.push_back(CV);
9383       CombinedInfo.Pointers.push_back(CV);
9384       if (!RI.getType()->isAnyPointerType()) {
9385         // We have to signal to the runtime captures passed by value that are
9386         // not pointers.
9387         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9388         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9389             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9390       } else {
9391         // Pointers are implicitly mapped with a zero size and no flags
9392         // (other than first map that is added for all implicit maps).
9393         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9394         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9395       }
9396       auto I = FirstPrivateDecls.find(VD);
9397       if (I != FirstPrivateDecls.end())
9398         IsImplicit = I->getSecond();
9399     } else {
9400       assert(CI.capturesVariable() && "Expected captured reference.");
9401       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9402       QualType ElementType = PtrTy->getPointeeType();
9403       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9404           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9405       // The default map type for a scalar/complex type is 'to' because by
9406       // default the value doesn't have to be retrieved. For an aggregate
9407       // type, the default is 'tofrom'.
9408       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9409       const VarDecl *VD = CI.getCapturedVar();
9410       auto I = FirstPrivateDecls.find(VD);
9411       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9412       CombinedInfo.BasePointers.push_back(CV);
9413       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9414         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9415             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9416             AlignmentSource::Decl));
9417         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9418       } else {
9419         CombinedInfo.Pointers.push_back(CV);
9420       }
9421       if (I != FirstPrivateDecls.end())
9422         IsImplicit = I->getSecond();
9423     }
9424     // Every default map produces a single argument which is a target parameter.
9425     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9426 
9427     // Add flag stating this is an implicit map.
9428     if (IsImplicit)
9429       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9430 
9431     // No user-defined mapper for default mapping.
9432     CombinedInfo.Mappers.push_back(nullptr);
9433   }
9434 };
9435 } // anonymous namespace
9436 
9437 static void emitNonContiguousDescriptor(
9438     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9439     CGOpenMPRuntime::TargetDataInfo &Info) {
9440   CodeGenModule &CGM = CGF.CGM;
9441   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9442       &NonContigInfo = CombinedInfo.NonContigInfo;
9443 
9444   // Build an array of struct descriptor_dim and then assign it to
9445   // offload_args.
9446   //
9447   // struct descriptor_dim {
9448   //  uint64_t offset;
9449   //  uint64_t count;
9450   //  uint64_t stride
9451   // };
9452   ASTContext &C = CGF.getContext();
9453   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9454   RecordDecl *RD;
9455   RD = C.buildImplicitRecord("descriptor_dim");
9456   RD->startDefinition();
9457   addFieldToRecordDecl(C, RD, Int64Ty);
9458   addFieldToRecordDecl(C, RD, Int64Ty);
9459   addFieldToRecordDecl(C, RD, Int64Ty);
9460   RD->completeDefinition();
9461   QualType DimTy = C.getRecordType(RD);
9462 
9463   enum { OffsetFD = 0, CountFD, StrideFD };
9464   // We need two index variable here since the size of "Dims" is the same as the
9465   // size of Components, however, the size of offset, count, and stride is equal
9466   // to the size of base declaration that is non-contiguous.
9467   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9468     // Skip emitting ir if dimension size is 1 since it cannot be
9469     // non-contiguous.
9470     if (NonContigInfo.Dims[I] == 1)
9471       continue;
9472     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9473     QualType ArrayTy =
9474         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9475     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9476     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9477       unsigned RevIdx = EE - II - 1;
9478       LValue DimsLVal = CGF.MakeAddrLValue(
9479           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9480       // Offset
9481       LValue OffsetLVal = CGF.EmitLValueForField(
9482           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9483       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9484       // Count
9485       LValue CountLVal = CGF.EmitLValueForField(
9486           DimsLVal, *std::next(RD->field_begin(), CountFD));
9487       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9488       // Stride
9489       LValue StrideLVal = CGF.EmitLValueForField(
9490           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9491       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9492     }
9493     // args[I] = &dims
9494     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9495         DimsAddr, CGM.Int8PtrTy);
9496     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9497         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9498         Info.PointersArray, 0, I);
9499     Address PAddr(P, CGF.getPointerAlign());
9500     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9501     ++L;
9502   }
9503 }
9504 
9505 // Try to extract the base declaration from a `this->x` expression if possible.
9506 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9507   if (!E)
9508     return nullptr;
9509 
9510   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9511     if (const MemberExpr *ME =
9512             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9513       return ME->getMemberDecl();
9514   return nullptr;
9515 }
9516 
9517 /// Emit a string constant containing the names of the values mapped to the
9518 /// offloading runtime library.
9519 llvm::Constant *
9520 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9521                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9522 
9523   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9524     return OMPBuilder.getOrCreateDefaultSrcLocStr();
9525 
9526   SourceLocation Loc;
9527   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9528     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9529       Loc = VD->getLocation();
9530     else
9531       Loc = MapExprs.getMapExpr()->getExprLoc();
9532   } else {
9533     Loc = MapExprs.getMapDecl()->getLocation();
9534   }
9535 
9536   std::string ExprName = "";
9537   if (MapExprs.getMapExpr()) {
9538     PrintingPolicy P(CGF.getContext().getLangOpts());
9539     llvm::raw_string_ostream OS(ExprName);
9540     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9541     OS.flush();
9542   } else {
9543     ExprName = MapExprs.getMapDecl()->getNameAsString();
9544   }
9545 
9546   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9547   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(),
9548                                          PLoc.getLine(), PLoc.getColumn());
9549 }
9550 
9551 /// Emit the arrays used to pass the captures and map information to the
9552 /// offloading runtime library. If there is no map or capture information,
9553 /// return nullptr by reference.
9554 static void emitOffloadingArrays(
9555     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9556     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9557     bool IsNonContiguous = false) {
9558   CodeGenModule &CGM = CGF.CGM;
9559   ASTContext &Ctx = CGF.getContext();
9560 
9561   // Reset the array information.
9562   Info.clearArrayInfo();
9563   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9564 
9565   if (Info.NumberOfPtrs) {
9566     // Detect if we have any capture size requiring runtime evaluation of the
9567     // size so that a constant array could be eventually used.
9568     bool hasRuntimeEvaluationCaptureSize = false;
9569     for (llvm::Value *S : CombinedInfo.Sizes)
9570       if (!isa<llvm::Constant>(S)) {
9571         hasRuntimeEvaluationCaptureSize = true;
9572         break;
9573       }
9574 
9575     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9576     QualType PointerArrayType = Ctx.getConstantArrayType(
9577         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9578         /*IndexTypeQuals=*/0);
9579 
9580     Info.BasePointersArray =
9581         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9582     Info.PointersArray =
9583         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9584     Address MappersArray =
9585         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9586     Info.MappersArray = MappersArray.getPointer();
9587 
9588     // If we don't have any VLA types or other types that require runtime
9589     // evaluation, we can use a constant array for the map sizes, otherwise we
9590     // need to fill up the arrays as we do for the pointers.
9591     QualType Int64Ty =
9592         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9593     if (hasRuntimeEvaluationCaptureSize) {
9594       QualType SizeArrayType = Ctx.getConstantArrayType(
9595           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9596           /*IndexTypeQuals=*/0);
9597       Info.SizesArray =
9598           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9599     } else {
9600       // We expect all the sizes to be constant, so we collect them to create
9601       // a constant array.
9602       SmallVector<llvm::Constant *, 16> ConstSizes;
9603       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9604         if (IsNonContiguous &&
9605             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9606           ConstSizes.push_back(llvm::ConstantInt::get(
9607               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9608         } else {
9609           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9610         }
9611       }
9612 
9613       auto *SizesArrayInit = llvm::ConstantArray::get(
9614           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9615       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9616       auto *SizesArrayGbl = new llvm::GlobalVariable(
9617           CGM.getModule(), SizesArrayInit->getType(),
9618           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9619           SizesArrayInit, Name);
9620       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9621       Info.SizesArray = SizesArrayGbl;
9622     }
9623 
9624     // The map types are always constant so we don't need to generate code to
9625     // fill arrays. Instead, we create an array constant.
9626     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9627     llvm::copy(CombinedInfo.Types, Mapping.begin());
9628     std::string MaptypesName =
9629         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9630     auto *MapTypesArrayGbl =
9631         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9632     Info.MapTypesArray = MapTypesArrayGbl;
9633 
9634     // The information types are only built if there is debug information
9635     // requested.
9636     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9637       Info.MapNamesArray = llvm::Constant::getNullValue(
9638           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9639     } else {
9640       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9641         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9642       };
9643       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9644       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9645       std::string MapnamesName =
9646           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9647       auto *MapNamesArrayGbl =
9648           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9649       Info.MapNamesArray = MapNamesArrayGbl;
9650     }
9651 
9652     // If there's a present map type modifier, it must not be applied to the end
9653     // of a region, so generate a separate map type array in that case.
9654     if (Info.separateBeginEndCalls()) {
9655       bool EndMapTypesDiffer = false;
9656       for (uint64_t &Type : Mapping) {
9657         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9658           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9659           EndMapTypesDiffer = true;
9660         }
9661       }
9662       if (EndMapTypesDiffer) {
9663         MapTypesArrayGbl =
9664             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9665         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9666       }
9667     }
9668 
9669     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9670       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9671       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9672           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9673           Info.BasePointersArray, 0, I);
9674       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9675           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9676       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9677       CGF.Builder.CreateStore(BPVal, BPAddr);
9678 
9679       if (Info.requiresDevicePointerInfo())
9680         if (const ValueDecl *DevVD =
9681                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9682           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9683 
9684       llvm::Value *PVal = CombinedInfo.Pointers[I];
9685       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9686           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9687           Info.PointersArray, 0, I);
9688       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9689           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9690       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9691       CGF.Builder.CreateStore(PVal, PAddr);
9692 
9693       if (hasRuntimeEvaluationCaptureSize) {
9694         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9695             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9696             Info.SizesArray,
9697             /*Idx0=*/0,
9698             /*Idx1=*/I);
9699         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9700         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9701                                                           CGM.Int64Ty,
9702                                                           /*isSigned=*/true),
9703                                 SAddr);
9704       }
9705 
9706       // Fill up the mapper array.
9707       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9708       if (CombinedInfo.Mappers[I]) {
9709         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9710             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9711         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9712         Info.HasMapper = true;
9713       }
9714       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9715       CGF.Builder.CreateStore(MFunc, MAddr);
9716     }
9717   }
9718 
9719   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9720       Info.NumberOfPtrs == 0)
9721     return;
9722 
9723   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9724 }
9725 
9726 namespace {
9727 /// Additional arguments for emitOffloadingArraysArgument function.
9728 struct ArgumentsOptions {
9729   bool ForEndCall = false;
9730   ArgumentsOptions() = default;
9731   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9732 };
9733 } // namespace
9734 
9735 /// Emit the arguments to be passed to the runtime library based on the
9736 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9737 /// ForEndCall, emit map types to be passed for the end of the region instead of
9738 /// the beginning.
9739 static void emitOffloadingArraysArgument(
9740     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9741     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9742     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9743     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9744     const ArgumentsOptions &Options = ArgumentsOptions()) {
9745   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9746          "expected region end call to runtime only when end call is separate");
9747   CodeGenModule &CGM = CGF.CGM;
9748   if (Info.NumberOfPtrs) {
9749     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9750         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9751         Info.BasePointersArray,
9752         /*Idx0=*/0, /*Idx1=*/0);
9753     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9754         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9755         Info.PointersArray,
9756         /*Idx0=*/0,
9757         /*Idx1=*/0);
9758     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9759         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9760         /*Idx0=*/0, /*Idx1=*/0);
9761     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9762         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9763         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9764                                                     : Info.MapTypesArray,
9765         /*Idx0=*/0,
9766         /*Idx1=*/0);
9767 
9768     // Only emit the mapper information arrays if debug information is
9769     // requested.
9770     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9771       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9772     else
9773       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9774           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9775           Info.MapNamesArray,
9776           /*Idx0=*/0,
9777           /*Idx1=*/0);
9778     // If there is no user-defined mapper, set the mapper array to nullptr to
9779     // avoid an unnecessary data privatization
9780     if (!Info.HasMapper)
9781       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9782     else
9783       MappersArrayArg =
9784           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9785   } else {
9786     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9787     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9788     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9789     MapTypesArrayArg =
9790         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9791     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9792     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9793   }
9794 }
9795 
9796 /// Check for inner distribute directive.
9797 static const OMPExecutableDirective *
9798 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9799   const auto *CS = D.getInnermostCapturedStmt();
9800   const auto *Body =
9801       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9802   const Stmt *ChildStmt =
9803       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9804 
9805   if (const auto *NestedDir =
9806           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9807     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9808     switch (D.getDirectiveKind()) {
9809     case OMPD_target:
9810       if (isOpenMPDistributeDirective(DKind))
9811         return NestedDir;
9812       if (DKind == OMPD_teams) {
9813         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9814             /*IgnoreCaptured=*/true);
9815         if (!Body)
9816           return nullptr;
9817         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9818         if (const auto *NND =
9819                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9820           DKind = NND->getDirectiveKind();
9821           if (isOpenMPDistributeDirective(DKind))
9822             return NND;
9823         }
9824       }
9825       return nullptr;
9826     case OMPD_target_teams:
9827       if (isOpenMPDistributeDirective(DKind))
9828         return NestedDir;
9829       return nullptr;
9830     case OMPD_target_parallel:
9831     case OMPD_target_simd:
9832     case OMPD_target_parallel_for:
9833     case OMPD_target_parallel_for_simd:
9834       return nullptr;
9835     case OMPD_target_teams_distribute:
9836     case OMPD_target_teams_distribute_simd:
9837     case OMPD_target_teams_distribute_parallel_for:
9838     case OMPD_target_teams_distribute_parallel_for_simd:
9839     case OMPD_parallel:
9840     case OMPD_for:
9841     case OMPD_parallel_for:
9842     case OMPD_parallel_master:
9843     case OMPD_parallel_sections:
9844     case OMPD_for_simd:
9845     case OMPD_parallel_for_simd:
9846     case OMPD_cancel:
9847     case OMPD_cancellation_point:
9848     case OMPD_ordered:
9849     case OMPD_threadprivate:
9850     case OMPD_allocate:
9851     case OMPD_task:
9852     case OMPD_simd:
9853     case OMPD_tile:
9854     case OMPD_unroll:
9855     case OMPD_sections:
9856     case OMPD_section:
9857     case OMPD_single:
9858     case OMPD_master:
9859     case OMPD_critical:
9860     case OMPD_taskyield:
9861     case OMPD_barrier:
9862     case OMPD_taskwait:
9863     case OMPD_taskgroup:
9864     case OMPD_atomic:
9865     case OMPD_flush:
9866     case OMPD_depobj:
9867     case OMPD_scan:
9868     case OMPD_teams:
9869     case OMPD_target_data:
9870     case OMPD_target_exit_data:
9871     case OMPD_target_enter_data:
9872     case OMPD_distribute:
9873     case OMPD_distribute_simd:
9874     case OMPD_distribute_parallel_for:
9875     case OMPD_distribute_parallel_for_simd:
9876     case OMPD_teams_distribute:
9877     case OMPD_teams_distribute_simd:
9878     case OMPD_teams_distribute_parallel_for:
9879     case OMPD_teams_distribute_parallel_for_simd:
9880     case OMPD_target_update:
9881     case OMPD_declare_simd:
9882     case OMPD_declare_variant:
9883     case OMPD_begin_declare_variant:
9884     case OMPD_end_declare_variant:
9885     case OMPD_declare_target:
9886     case OMPD_end_declare_target:
9887     case OMPD_declare_reduction:
9888     case OMPD_declare_mapper:
9889     case OMPD_taskloop:
9890     case OMPD_taskloop_simd:
9891     case OMPD_master_taskloop:
9892     case OMPD_master_taskloop_simd:
9893     case OMPD_parallel_master_taskloop:
9894     case OMPD_parallel_master_taskloop_simd:
9895     case OMPD_requires:
9896     case OMPD_metadirective:
9897     case OMPD_unknown:
9898     default:
9899       llvm_unreachable("Unexpected directive.");
9900     }
9901   }
9902 
9903   return nullptr;
9904 }
9905 
9906 /// Emit the user-defined mapper function. The code generation follows the
9907 /// pattern in the example below.
9908 /// \code
9909 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9910 ///                                           void *base, void *begin,
9911 ///                                           int64_t size, int64_t type,
9912 ///                                           void *name = nullptr) {
9913 ///   // Allocate space for an array section first or add a base/begin for
9914 ///   // pointer dereference.
9915 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9916 ///       !maptype.IsDelete)
9917 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9918 ///                                 size*sizeof(Ty), clearToFromMember(type));
9919 ///   // Map members.
9920 ///   for (unsigned i = 0; i < size; i++) {
9921 ///     // For each component specified by this mapper:
9922 ///     for (auto c : begin[i]->all_components) {
9923 ///       if (c.hasMapper())
9924 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9925 ///                       c.arg_type, c.arg_name);
9926 ///       else
9927 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9928 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9929 ///                                     c.arg_name);
9930 ///     }
9931 ///   }
9932 ///   // Delete the array section.
9933 ///   if (size > 1 && maptype.IsDelete)
9934 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9935 ///                                 size*sizeof(Ty), clearToFromMember(type));
9936 /// }
9937 /// \endcode
9938 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9939                                             CodeGenFunction *CGF) {
9940   if (UDMMap.count(D) > 0)
9941     return;
9942   ASTContext &C = CGM.getContext();
9943   QualType Ty = D->getType();
9944   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9945   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9946   auto *MapperVarDecl =
9947       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9948   SourceLocation Loc = D->getLocation();
9949   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9950 
9951   // Prepare mapper function arguments and attributes.
9952   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9953                               C.VoidPtrTy, ImplicitParamDecl::Other);
9954   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9955                             ImplicitParamDecl::Other);
9956   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9957                              C.VoidPtrTy, ImplicitParamDecl::Other);
9958   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9959                             ImplicitParamDecl::Other);
9960   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9961                             ImplicitParamDecl::Other);
9962   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9963                             ImplicitParamDecl::Other);
9964   FunctionArgList Args;
9965   Args.push_back(&HandleArg);
9966   Args.push_back(&BaseArg);
9967   Args.push_back(&BeginArg);
9968   Args.push_back(&SizeArg);
9969   Args.push_back(&TypeArg);
9970   Args.push_back(&NameArg);
9971   const CGFunctionInfo &FnInfo =
9972       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9973   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9974   SmallString<64> TyStr;
9975   llvm::raw_svector_ostream Out(TyStr);
9976   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9977   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9978   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9979                                     Name, &CGM.getModule());
9980   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9981   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9982   // Start the mapper function code generation.
9983   CodeGenFunction MapperCGF(CGM);
9984   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9985   // Compute the starting and end addresses of array elements.
9986   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9987       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9988       C.getPointerType(Int64Ty), Loc);
9989   // Prepare common arguments for array initiation and deletion.
9990   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9991       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9992       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9993   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9994       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9995       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9996   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9997       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9998       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9999   // Convert the size in bytes into the number of array elements.
10000   Size = MapperCGF.Builder.CreateExactUDiv(
10001       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10002   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10003       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10004   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10005       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10006   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10007       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10008       C.getPointerType(Int64Ty), Loc);
10009   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10010       MapperCGF.GetAddrOfLocalVar(&NameArg),
10011       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10012 
10013   // Emit array initiation if this is an array section and \p MapType indicates
10014   // that memory allocation is required.
10015   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10016   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10017                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10018 
10019   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10020 
10021   // Emit the loop header block.
10022   MapperCGF.EmitBlock(HeadBB);
10023   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10024   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10025   // Evaluate whether the initial condition is satisfied.
10026   llvm::Value *IsEmpty =
10027       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10028   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10029   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10030 
10031   // Emit the loop body block.
10032   MapperCGF.EmitBlock(BodyBB);
10033   llvm::BasicBlock *LastBB = BodyBB;
10034   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10035       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10036   PtrPHI->addIncoming(PtrBegin, EntryBB);
10037   Address PtrCurrent =
10038       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10039                           .getAlignment()
10040                           .alignmentOfArrayElement(ElementSize));
10041   // Privatize the declared variable of mapper to be the current array element.
10042   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10043   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10044   (void)Scope.Privatize();
10045 
10046   // Get map clause information. Fill up the arrays with all mapped variables.
10047   MappableExprsHandler::MapCombinedInfoTy Info;
10048   MappableExprsHandler MEHandler(*D, MapperCGF);
10049   MEHandler.generateAllInfoForMapper(Info);
10050 
10051   // Call the runtime API __tgt_mapper_num_components to get the number of
10052   // pre-existing components.
10053   llvm::Value *OffloadingArgs[] = {Handle};
10054   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10055       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10056                                             OMPRTL___tgt_mapper_num_components),
10057       OffloadingArgs);
10058   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10059       PreviousSize,
10060       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10061 
10062   // Fill up the runtime mapper handle for all components.
10063   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10064     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10065         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10066     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10067         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10068     llvm::Value *CurSizeArg = Info.Sizes[I];
10069     llvm::Value *CurNameArg =
10070         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10071             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10072             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10073 
10074     // Extract the MEMBER_OF field from the map type.
10075     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10076     llvm::Value *MemberMapType =
10077         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10078 
10079     // Combine the map type inherited from user-defined mapper with that
10080     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10081     // bits of the \a MapType, which is the input argument of the mapper
10082     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10083     // bits of MemberMapType.
10084     // [OpenMP 5.0], 1.2.6. map-type decay.
10085     //        | alloc |  to   | from  | tofrom | release | delete
10086     // ----------------------------------------------------------
10087     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10088     // to     | alloc |  to   | alloc |   to   | release | delete
10089     // from   | alloc | alloc | from  |  from  | release | delete
10090     // tofrom | alloc |  to   | from  | tofrom | release | delete
10091     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10092         MapType,
10093         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10094                                    MappableExprsHandler::OMP_MAP_FROM));
10095     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10096     llvm::BasicBlock *AllocElseBB =
10097         MapperCGF.createBasicBlock("omp.type.alloc.else");
10098     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10099     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10100     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10101     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10102     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10103     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10104     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10105     MapperCGF.EmitBlock(AllocBB);
10106     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10107         MemberMapType,
10108         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10109                                      MappableExprsHandler::OMP_MAP_FROM)));
10110     MapperCGF.Builder.CreateBr(EndBB);
10111     MapperCGF.EmitBlock(AllocElseBB);
10112     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10113         LeftToFrom,
10114         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10115     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10116     // In case of to, clear OMP_MAP_FROM.
10117     MapperCGF.EmitBlock(ToBB);
10118     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10119         MemberMapType,
10120         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10121     MapperCGF.Builder.CreateBr(EndBB);
10122     MapperCGF.EmitBlock(ToElseBB);
10123     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10124         LeftToFrom,
10125         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10126     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10127     // In case of from, clear OMP_MAP_TO.
10128     MapperCGF.EmitBlock(FromBB);
10129     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10130         MemberMapType,
10131         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10132     // In case of tofrom, do nothing.
10133     MapperCGF.EmitBlock(EndBB);
10134     LastBB = EndBB;
10135     llvm::PHINode *CurMapType =
10136         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10137     CurMapType->addIncoming(AllocMapType, AllocBB);
10138     CurMapType->addIncoming(ToMapType, ToBB);
10139     CurMapType->addIncoming(FromMapType, FromBB);
10140     CurMapType->addIncoming(MemberMapType, ToElseBB);
10141 
10142     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10143                                      CurSizeArg, CurMapType, CurNameArg};
10144     if (Info.Mappers[I]) {
10145       // Call the corresponding mapper function.
10146       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10147           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10148       assert(MapperFunc && "Expect a valid mapper function is available.");
10149       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10150     } else {
10151       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10152       // data structure.
10153       MapperCGF.EmitRuntimeCall(
10154           OMPBuilder.getOrCreateRuntimeFunction(
10155               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10156           OffloadingArgs);
10157     }
10158   }
10159 
10160   // Update the pointer to point to the next element that needs to be mapped,
10161   // and check whether we have mapped all elements.
10162   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10163   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10164       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10165   PtrPHI->addIncoming(PtrNext, LastBB);
10166   llvm::Value *IsDone =
10167       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10168   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10169   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10170 
10171   MapperCGF.EmitBlock(ExitBB);
10172   // Emit array deletion if this is an array section and \p MapType indicates
10173   // that deletion is required.
10174   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10175                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10176 
10177   // Emit the function exit block.
10178   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10179   MapperCGF.FinishFunction();
10180   UDMMap.try_emplace(D, Fn);
10181   if (CGF) {
10182     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10183     Decls.second.push_back(D);
10184   }
10185 }
10186 
10187 /// Emit the array initialization or deletion portion for user-defined mapper
10188 /// code generation. First, it evaluates whether an array section is mapped and
10189 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10190 /// true, and \a MapType indicates to not delete this array, array
10191 /// initialization code is generated. If \a IsInit is false, and \a MapType
10192 /// indicates to not this array, array deletion code is generated.
10193 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10194     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10195     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10196     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10197     bool IsInit) {
10198   StringRef Prefix = IsInit ? ".init" : ".del";
10199 
10200   // Evaluate if this is an array section.
10201   llvm::BasicBlock *BodyBB =
10202       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10203   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10204       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10205   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10206       MapType,
10207       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10208   llvm::Value *DeleteCond;
10209   llvm::Value *Cond;
10210   if (IsInit) {
10211     // base != begin?
10212     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10213         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10214     // IsPtrAndObj?
10215     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10216         MapType,
10217         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10218     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10219     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10220     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10221     DeleteCond = MapperCGF.Builder.CreateIsNull(
10222         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10223   } else {
10224     Cond = IsArray;
10225     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10226         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10227   }
10228   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10229   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10230 
10231   MapperCGF.EmitBlock(BodyBB);
10232   // Get the array size by multiplying element size and element number (i.e., \p
10233   // Size).
10234   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10235       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10236   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10237   // memory allocation/deletion purpose only.
10238   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10239       MapType,
10240       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10241                                    MappableExprsHandler::OMP_MAP_FROM)));
10242   MapTypeArg = MapperCGF.Builder.CreateOr(
10243       MapTypeArg,
10244       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10245 
10246   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10247   // data structure.
10248   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10249                                    ArraySize, MapTypeArg, MapName};
10250   MapperCGF.EmitRuntimeCall(
10251       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10252                                             OMPRTL___tgt_push_mapper_component),
10253       OffloadingArgs);
10254 }
10255 
10256 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10257     const OMPDeclareMapperDecl *D) {
10258   auto I = UDMMap.find(D);
10259   if (I != UDMMap.end())
10260     return I->second;
10261   emitUserDefinedMapper(D);
10262   return UDMMap.lookup(D);
10263 }
10264 
10265 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10266     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10267     llvm::Value *DeviceID,
10268     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10269                                      const OMPLoopDirective &D)>
10270         SizeEmitter) {
10271   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10272   const OMPExecutableDirective *TD = &D;
10273   // Get nested teams distribute kind directive, if any.
10274   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10275     TD = getNestedDistributeDirective(CGM.getContext(), D);
10276   if (!TD)
10277     return;
10278   const auto *LD = cast<OMPLoopDirective>(TD);
10279   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10280                                                          PrePostActionTy &) {
10281     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10282       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10283       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10284       CGF.EmitRuntimeCall(
10285           OMPBuilder.getOrCreateRuntimeFunction(
10286               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10287           Args);
10288     }
10289   };
10290   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10291 }
10292 
10293 void CGOpenMPRuntime::emitTargetCall(
10294     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10295     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10296     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10297     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10298                                      const OMPLoopDirective &D)>
10299         SizeEmitter) {
10300   if (!CGF.HaveInsertPoint())
10301     return;
10302 
10303   assert(OutlinedFn && "Invalid outlined function!");
10304 
10305   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10306                                  D.hasClausesOfKind<OMPNowaitClause>();
10307   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10308   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10309   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10310                                             PrePostActionTy &) {
10311     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10312   };
10313   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10314 
10315   CodeGenFunction::OMPTargetDataInfo InputInfo;
10316   llvm::Value *MapTypesArray = nullptr;
10317   llvm::Value *MapNamesArray = nullptr;
10318   // Fill up the pointer arrays and transfer execution to the device.
10319   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10320                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10321                     &CapturedVars,
10322                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10323     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10324       // Reverse offloading is not supported, so just execute on the host.
10325       if (RequiresOuterTask) {
10326         CapturedVars.clear();
10327         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10328       }
10329       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10330       return;
10331     }
10332 
10333     // On top of the arrays that were filled up, the target offloading call
10334     // takes as arguments the device id as well as the host pointer. The host
10335     // pointer is used by the runtime library to identify the current target
10336     // region, so it only has to be unique and not necessarily point to
10337     // anything. It could be the pointer to the outlined function that
10338     // implements the target region, but we aren't using that so that the
10339     // compiler doesn't need to keep that, and could therefore inline the host
10340     // function if proven worthwhile during optimization.
10341 
10342     // From this point on, we need to have an ID of the target region defined.
10343     assert(OutlinedFnID && "Invalid outlined function ID!");
10344 
10345     // Emit device ID if any.
10346     llvm::Value *DeviceID;
10347     if (Device.getPointer()) {
10348       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10349               Device.getInt() == OMPC_DEVICE_device_num) &&
10350              "Expected device_num modifier.");
10351       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10352       DeviceID =
10353           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10354     } else {
10355       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10356     }
10357 
10358     // Emit the number of elements in the offloading arrays.
10359     llvm::Value *PointerNum =
10360         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10361 
10362     // Return value of the runtime offloading call.
10363     llvm::Value *Return;
10364 
10365     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10366     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10367 
10368     // Source location for the ident struct
10369     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10370 
10371     // Emit tripcount for the target loop-based directive.
10372     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10373 
10374     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10375     // The target region is an outlined function launched by the runtime
10376     // via calls __tgt_target() or __tgt_target_teams().
10377     //
10378     // __tgt_target() launches a target region with one team and one thread,
10379     // executing a serial region.  This master thread may in turn launch
10380     // more threads within its team upon encountering a parallel region,
10381     // however, no additional teams can be launched on the device.
10382     //
10383     // __tgt_target_teams() launches a target region with one or more teams,
10384     // each with one or more threads.  This call is required for target
10385     // constructs such as:
10386     //  'target teams'
10387     //  'target' / 'teams'
10388     //  'target teams distribute parallel for'
10389     //  'target parallel'
10390     // and so on.
10391     //
10392     // Note that on the host and CPU targets, the runtime implementation of
10393     // these calls simply call the outlined function without forking threads.
10394     // The outlined functions themselves have runtime calls to
10395     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10396     // the compiler in emitTeamsCall() and emitParallelCall().
10397     //
10398     // In contrast, on the NVPTX target, the implementation of
10399     // __tgt_target_teams() launches a GPU kernel with the requested number
10400     // of teams and threads so no additional calls to the runtime are required.
10401     if (NumTeams) {
10402       // If we have NumTeams defined this means that we have an enclosed teams
10403       // region. Therefore we also expect to have NumThreads defined. These two
10404       // values should be defined in the presence of a teams directive,
10405       // regardless of having any clauses associated. If the user is using teams
10406       // but no clauses, these two values will be the default that should be
10407       // passed to the runtime library - a 32-bit integer with the value zero.
10408       assert(NumThreads && "Thread limit expression should be available along "
10409                            "with number of teams.");
10410       SmallVector<llvm::Value *> OffloadingArgs = {
10411           RTLoc,
10412           DeviceID,
10413           OutlinedFnID,
10414           PointerNum,
10415           InputInfo.BasePointersArray.getPointer(),
10416           InputInfo.PointersArray.getPointer(),
10417           InputInfo.SizesArray.getPointer(),
10418           MapTypesArray,
10419           MapNamesArray,
10420           InputInfo.MappersArray.getPointer(),
10421           NumTeams,
10422           NumThreads};
10423       if (HasNowait) {
10424         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10425         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10426         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10427         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10428         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10429         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10430       }
10431       Return = CGF.EmitRuntimeCall(
10432           OMPBuilder.getOrCreateRuntimeFunction(
10433               CGM.getModule(), HasNowait
10434                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10435                                    : OMPRTL___tgt_target_teams_mapper),
10436           OffloadingArgs);
10437     } else {
10438       SmallVector<llvm::Value *> OffloadingArgs = {
10439           RTLoc,
10440           DeviceID,
10441           OutlinedFnID,
10442           PointerNum,
10443           InputInfo.BasePointersArray.getPointer(),
10444           InputInfo.PointersArray.getPointer(),
10445           InputInfo.SizesArray.getPointer(),
10446           MapTypesArray,
10447           MapNamesArray,
10448           InputInfo.MappersArray.getPointer()};
10449       if (HasNowait) {
10450         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10451         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10452         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10453         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10454         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10455         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10456       }
10457       Return = CGF.EmitRuntimeCall(
10458           OMPBuilder.getOrCreateRuntimeFunction(
10459               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10460                                          : OMPRTL___tgt_target_mapper),
10461           OffloadingArgs);
10462     }
10463 
10464     // Check the error code and execute the host version if required.
10465     llvm::BasicBlock *OffloadFailedBlock =
10466         CGF.createBasicBlock("omp_offload.failed");
10467     llvm::BasicBlock *OffloadContBlock =
10468         CGF.createBasicBlock("omp_offload.cont");
10469     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10470     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10471 
10472     CGF.EmitBlock(OffloadFailedBlock);
10473     if (RequiresOuterTask) {
10474       CapturedVars.clear();
10475       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10476     }
10477     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10478     CGF.EmitBranch(OffloadContBlock);
10479 
10480     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10481   };
10482 
10483   // Notify that the host version must be executed.
10484   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10485                     RequiresOuterTask](CodeGenFunction &CGF,
10486                                        PrePostActionTy &) {
10487     if (RequiresOuterTask) {
10488       CapturedVars.clear();
10489       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10490     }
10491     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10492   };
10493 
10494   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10495                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10496                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10497     // Fill up the arrays with all the captured variables.
10498     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10499 
10500     // Get mappable expression information.
10501     MappableExprsHandler MEHandler(D, CGF);
10502     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10503     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10504 
10505     auto RI = CS.getCapturedRecordDecl()->field_begin();
10506     auto *CV = CapturedVars.begin();
10507     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10508                                               CE = CS.capture_end();
10509          CI != CE; ++CI, ++RI, ++CV) {
10510       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10511       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10512 
10513       // VLA sizes are passed to the outlined region by copy and do not have map
10514       // information associated.
10515       if (CI->capturesVariableArrayType()) {
10516         CurInfo.Exprs.push_back(nullptr);
10517         CurInfo.BasePointers.push_back(*CV);
10518         CurInfo.Pointers.push_back(*CV);
10519         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10520             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10521         // Copy to the device as an argument. No need to retrieve it.
10522         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10523                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10524                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10525         CurInfo.Mappers.push_back(nullptr);
10526       } else {
10527         // If we have any information in the map clause, we use it, otherwise we
10528         // just do a default mapping.
10529         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10530         if (!CI->capturesThis())
10531           MappedVarSet.insert(CI->getCapturedVar());
10532         else
10533           MappedVarSet.insert(nullptr);
10534         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10535           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10536         // Generate correct mapping for variables captured by reference in
10537         // lambdas.
10538         if (CI->capturesVariable())
10539           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10540                                                   CurInfo, LambdaPointers);
10541       }
10542       // We expect to have at least an element of information for this capture.
10543       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10544              "Non-existing map pointer for capture!");
10545       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10546              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10547              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10548              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10549              "Inconsistent map information sizes!");
10550 
10551       // If there is an entry in PartialStruct it means we have a struct with
10552       // individual members mapped. Emit an extra combined entry.
10553       if (PartialStruct.Base.isValid()) {
10554         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10555         MEHandler.emitCombinedEntry(
10556             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10557             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10558       }
10559 
10560       // We need to append the results of this capture to what we already have.
10561       CombinedInfo.append(CurInfo);
10562     }
10563     // Adjust MEMBER_OF flags for the lambdas captures.
10564     MEHandler.adjustMemberOfForLambdaCaptures(
10565         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10566         CombinedInfo.Types);
10567     // Map any list items in a map clause that were not captures because they
10568     // weren't referenced within the construct.
10569     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10570 
10571     TargetDataInfo Info;
10572     // Fill up the arrays and create the arguments.
10573     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10574     emitOffloadingArraysArgument(
10575         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10576         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10577         {/*ForEndTask=*/false});
10578 
10579     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10580     InputInfo.BasePointersArray =
10581         Address(Info.BasePointersArray, CGM.getPointerAlign());
10582     InputInfo.PointersArray =
10583         Address(Info.PointersArray, CGM.getPointerAlign());
10584     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10585     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10586     MapTypesArray = Info.MapTypesArray;
10587     MapNamesArray = Info.MapNamesArray;
10588     if (RequiresOuterTask)
10589       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10590     else
10591       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10592   };
10593 
10594   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10595                              CodeGenFunction &CGF, PrePostActionTy &) {
10596     if (RequiresOuterTask) {
10597       CodeGenFunction::OMPTargetDataInfo InputInfo;
10598       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10599     } else {
10600       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10601     }
10602   };
10603 
10604   // If we have a target function ID it means that we need to support
10605   // offloading, otherwise, just execute on the host. We need to execute on host
10606   // regardless of the conditional in the if clause if, e.g., the user do not
10607   // specify target triples.
10608   if (OutlinedFnID) {
10609     if (IfCond) {
10610       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10611     } else {
10612       RegionCodeGenTy ThenRCG(TargetThenGen);
10613       ThenRCG(CGF);
10614     }
10615   } else {
10616     RegionCodeGenTy ElseRCG(TargetElseGen);
10617     ElseRCG(CGF);
10618   }
10619 }
10620 
10621 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10622                                                     StringRef ParentName) {
10623   if (!S)
10624     return;
10625 
10626   // Codegen OMP target directives that offload compute to the device.
10627   bool RequiresDeviceCodegen =
10628       isa<OMPExecutableDirective>(S) &&
10629       isOpenMPTargetExecutionDirective(
10630           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10631 
10632   if (RequiresDeviceCodegen) {
10633     const auto &E = *cast<OMPExecutableDirective>(S);
10634     unsigned DeviceID;
10635     unsigned FileID;
10636     unsigned Line;
10637     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10638                              FileID, Line);
10639 
10640     // Is this a target region that should not be emitted as an entry point? If
10641     // so just signal we are done with this target region.
10642     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10643                                                             ParentName, Line))
10644       return;
10645 
10646     switch (E.getDirectiveKind()) {
10647     case OMPD_target:
10648       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10649                                                    cast<OMPTargetDirective>(E));
10650       break;
10651     case OMPD_target_parallel:
10652       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10653           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10654       break;
10655     case OMPD_target_teams:
10656       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10657           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10658       break;
10659     case OMPD_target_teams_distribute:
10660       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10661           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10662       break;
10663     case OMPD_target_teams_distribute_simd:
10664       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10665           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10666       break;
10667     case OMPD_target_parallel_for:
10668       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10669           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10670       break;
10671     case OMPD_target_parallel_for_simd:
10672       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10673           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10674       break;
10675     case OMPD_target_simd:
10676       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10677           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10678       break;
10679     case OMPD_target_teams_distribute_parallel_for:
10680       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10681           CGM, ParentName,
10682           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10683       break;
10684     case OMPD_target_teams_distribute_parallel_for_simd:
10685       CodeGenFunction::
10686           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10687               CGM, ParentName,
10688               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10689       break;
10690     case OMPD_parallel:
10691     case OMPD_for:
10692     case OMPD_parallel_for:
10693     case OMPD_parallel_master:
10694     case OMPD_parallel_sections:
10695     case OMPD_for_simd:
10696     case OMPD_parallel_for_simd:
10697     case OMPD_cancel:
10698     case OMPD_cancellation_point:
10699     case OMPD_ordered:
10700     case OMPD_threadprivate:
10701     case OMPD_allocate:
10702     case OMPD_task:
10703     case OMPD_simd:
10704     case OMPD_tile:
10705     case OMPD_unroll:
10706     case OMPD_sections:
10707     case OMPD_section:
10708     case OMPD_single:
10709     case OMPD_master:
10710     case OMPD_critical:
10711     case OMPD_taskyield:
10712     case OMPD_barrier:
10713     case OMPD_taskwait:
10714     case OMPD_taskgroup:
10715     case OMPD_atomic:
10716     case OMPD_flush:
10717     case OMPD_depobj:
10718     case OMPD_scan:
10719     case OMPD_teams:
10720     case OMPD_target_data:
10721     case OMPD_target_exit_data:
10722     case OMPD_target_enter_data:
10723     case OMPD_distribute:
10724     case OMPD_distribute_simd:
10725     case OMPD_distribute_parallel_for:
10726     case OMPD_distribute_parallel_for_simd:
10727     case OMPD_teams_distribute:
10728     case OMPD_teams_distribute_simd:
10729     case OMPD_teams_distribute_parallel_for:
10730     case OMPD_teams_distribute_parallel_for_simd:
10731     case OMPD_target_update:
10732     case OMPD_declare_simd:
10733     case OMPD_declare_variant:
10734     case OMPD_begin_declare_variant:
10735     case OMPD_end_declare_variant:
10736     case OMPD_declare_target:
10737     case OMPD_end_declare_target:
10738     case OMPD_declare_reduction:
10739     case OMPD_declare_mapper:
10740     case OMPD_taskloop:
10741     case OMPD_taskloop_simd:
10742     case OMPD_master_taskloop:
10743     case OMPD_master_taskloop_simd:
10744     case OMPD_parallel_master_taskloop:
10745     case OMPD_parallel_master_taskloop_simd:
10746     case OMPD_requires:
10747     case OMPD_metadirective:
10748     case OMPD_unknown:
10749     default:
10750       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10751     }
10752     return;
10753   }
10754 
10755   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10756     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10757       return;
10758 
10759     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10760     return;
10761   }
10762 
10763   // If this is a lambda function, look into its body.
10764   if (const auto *L = dyn_cast<LambdaExpr>(S))
10765     S = L->getBody();
10766 
10767   // Keep looking for target regions recursively.
10768   for (const Stmt *II : S->children())
10769     scanForTargetRegionsFunctions(II, ParentName);
10770 }
10771 
10772 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10773   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10774       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10775   if (!DevTy)
10776     return false;
10777   // Do not emit device_type(nohost) functions for the host.
10778   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10779     return true;
10780   // Do not emit device_type(host) functions for the device.
10781   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10782     return true;
10783   return false;
10784 }
10785 
10786 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10787   // If emitting code for the host, we do not process FD here. Instead we do
10788   // the normal code generation.
10789   if (!CGM.getLangOpts().OpenMPIsDevice) {
10790     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10791       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10792                                   CGM.getLangOpts().OpenMPIsDevice))
10793         return true;
10794     return false;
10795   }
10796 
10797   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10798   // Try to detect target regions in the function.
10799   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10800     StringRef Name = CGM.getMangledName(GD);
10801     scanForTargetRegionsFunctions(FD->getBody(), Name);
10802     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10803                                 CGM.getLangOpts().OpenMPIsDevice))
10804       return true;
10805   }
10806 
10807   // Do not to emit function if it is not marked as declare target.
10808   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10809          AlreadyEmittedTargetDecls.count(VD) == 0;
10810 }
10811 
10812 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10813   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10814                               CGM.getLangOpts().OpenMPIsDevice))
10815     return true;
10816 
10817   if (!CGM.getLangOpts().OpenMPIsDevice)
10818     return false;
10819 
10820   // Check if there are Ctors/Dtors in this declaration and look for target
10821   // regions in it. We use the complete variant to produce the kernel name
10822   // mangling.
10823   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10824   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10825     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10826       StringRef ParentName =
10827           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10828       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10829     }
10830     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10831       StringRef ParentName =
10832           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10833       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10834     }
10835   }
10836 
10837   // Do not to emit variable if it is not marked as declare target.
10838   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10839       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10840           cast<VarDecl>(GD.getDecl()));
10841   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10842       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10843        HasRequiresUnifiedSharedMemory)) {
10844     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10845     return true;
10846   }
10847   return false;
10848 }
10849 
10850 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10851                                                    llvm::Constant *Addr) {
10852   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10853       !CGM.getLangOpts().OpenMPIsDevice)
10854     return;
10855 
10856   // If we have host/nohost variables, they do not need to be registered.
10857   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10858       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10859   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10860     return;
10861 
10862   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10863       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10864   if (!Res) {
10865     if (CGM.getLangOpts().OpenMPIsDevice) {
10866       // Register non-target variables being emitted in device code (debug info
10867       // may cause this).
10868       StringRef VarName = CGM.getMangledName(VD);
10869       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10870     }
10871     return;
10872   }
10873   // Register declare target variables.
10874   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10875   StringRef VarName;
10876   CharUnits VarSize;
10877   llvm::GlobalValue::LinkageTypes Linkage;
10878 
10879   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10880       !HasRequiresUnifiedSharedMemory) {
10881     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10882     VarName = CGM.getMangledName(VD);
10883     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10884       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10885       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10886     } else {
10887       VarSize = CharUnits::Zero();
10888     }
10889     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10890     // Temp solution to prevent optimizations of the internal variables.
10891     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10892       // Do not create a "ref-variable" if the original is not also available
10893       // on the host.
10894       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10895         return;
10896       std::string RefName = getName({VarName, "ref"});
10897       if (!CGM.GetGlobalValue(RefName)) {
10898         llvm::Constant *AddrRef =
10899             getOrCreateInternalVariable(Addr->getType(), RefName);
10900         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10901         GVAddrRef->setConstant(/*Val=*/true);
10902         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10903         GVAddrRef->setInitializer(Addr);
10904         CGM.addCompilerUsedGlobal(GVAddrRef);
10905       }
10906     }
10907   } else {
10908     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10909             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10910              HasRequiresUnifiedSharedMemory)) &&
10911            "Declare target attribute must link or to with unified memory.");
10912     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10913       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10914     else
10915       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10916 
10917     if (CGM.getLangOpts().OpenMPIsDevice) {
10918       VarName = Addr->getName();
10919       Addr = nullptr;
10920     } else {
10921       VarName = getAddrOfDeclareTargetVar(VD).getName();
10922       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10923     }
10924     VarSize = CGM.getPointerSize();
10925     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10926   }
10927 
10928   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10929       VarName, Addr, VarSize, Flags, Linkage);
10930 }
10931 
10932 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10933   if (isa<FunctionDecl>(GD.getDecl()) ||
10934       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10935     return emitTargetFunctions(GD);
10936 
10937   return emitTargetGlobalVariable(GD);
10938 }
10939 
10940 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10941   for (const VarDecl *VD : DeferredGlobalVariables) {
10942     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10943         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10944     if (!Res)
10945       continue;
10946     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10947         !HasRequiresUnifiedSharedMemory) {
10948       CGM.EmitGlobal(VD);
10949     } else {
10950       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10951               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10952                HasRequiresUnifiedSharedMemory)) &&
10953              "Expected link clause or to clause with unified memory.");
10954       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10955     }
10956   }
10957 }
10958 
10959 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10960     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10961   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10962          " Expected target-based directive.");
10963 }
10964 
10965 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10966   for (const OMPClause *Clause : D->clauselists()) {
10967     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10968       HasRequiresUnifiedSharedMemory = true;
10969     } else if (const auto *AC =
10970                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10971       switch (AC->getAtomicDefaultMemOrderKind()) {
10972       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10973         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10974         break;
10975       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10976         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10977         break;
10978       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10979         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10980         break;
10981       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10982         break;
10983       }
10984     }
10985   }
10986 }
10987 
10988 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10989   return RequiresAtomicOrdering;
10990 }
10991 
10992 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10993                                                        LangAS &AS) {
10994   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10995     return false;
10996   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10997   switch(A->getAllocatorType()) {
10998   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10999   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11000   // Not supported, fallback to the default mem space.
11001   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11002   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11003   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11004   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11005   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11006   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11007   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11008     AS = LangAS::Default;
11009     return true;
11010   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11011     llvm_unreachable("Expected predefined allocator for the variables with the "
11012                      "static storage.");
11013   }
11014   return false;
11015 }
11016 
11017 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11018   return HasRequiresUnifiedSharedMemory;
11019 }
11020 
11021 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11022     CodeGenModule &CGM)
11023     : CGM(CGM) {
11024   if (CGM.getLangOpts().OpenMPIsDevice) {
11025     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11026     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11027   }
11028 }
11029 
11030 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11031   if (CGM.getLangOpts().OpenMPIsDevice)
11032     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11033 }
11034 
11035 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11036   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11037     return true;
11038 
11039   const auto *D = cast<FunctionDecl>(GD.getDecl());
11040   // Do not to emit function if it is marked as declare target as it was already
11041   // emitted.
11042   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11043     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11044       if (auto *F = dyn_cast_or_null<llvm::Function>(
11045               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11046         return !F->isDeclaration();
11047       return false;
11048     }
11049     return true;
11050   }
11051 
11052   return !AlreadyEmittedTargetDecls.insert(D).second;
11053 }
11054 
11055 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11056   // If we don't have entries or if we are emitting code for the device, we
11057   // don't need to do anything.
11058   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11059       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11060       (OffloadEntriesInfoManager.empty() &&
11061        !HasEmittedDeclareTargetRegion &&
11062        !HasEmittedTargetRegion))
11063     return nullptr;
11064 
11065   // Create and register the function that handles the requires directives.
11066   ASTContext &C = CGM.getContext();
11067 
11068   llvm::Function *RequiresRegFn;
11069   {
11070     CodeGenFunction CGF(CGM);
11071     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11072     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11073     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11074     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11075     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11076     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11077     // TODO: check for other requires clauses.
11078     // The requires directive takes effect only when a target region is
11079     // present in the compilation unit. Otherwise it is ignored and not
11080     // passed to the runtime. This avoids the runtime from throwing an error
11081     // for mismatching requires clauses across compilation units that don't
11082     // contain at least 1 target region.
11083     assert((HasEmittedTargetRegion ||
11084             HasEmittedDeclareTargetRegion ||
11085             !OffloadEntriesInfoManager.empty()) &&
11086            "Target or declare target region expected.");
11087     if (HasRequiresUnifiedSharedMemory)
11088       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11089     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11090                             CGM.getModule(), OMPRTL___tgt_register_requires),
11091                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11092     CGF.FinishFunction();
11093   }
11094   return RequiresRegFn;
11095 }
11096 
11097 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11098                                     const OMPExecutableDirective &D,
11099                                     SourceLocation Loc,
11100                                     llvm::Function *OutlinedFn,
11101                                     ArrayRef<llvm::Value *> CapturedVars) {
11102   if (!CGF.HaveInsertPoint())
11103     return;
11104 
11105   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11106   CodeGenFunction::RunCleanupsScope Scope(CGF);
11107 
11108   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11109   llvm::Value *Args[] = {
11110       RTLoc,
11111       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11112       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11113   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11114   RealArgs.append(std::begin(Args), std::end(Args));
11115   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11116 
11117   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11118       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11119   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11120 }
11121 
11122 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11123                                          const Expr *NumTeams,
11124                                          const Expr *ThreadLimit,
11125                                          SourceLocation Loc) {
11126   if (!CGF.HaveInsertPoint())
11127     return;
11128 
11129   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11130 
11131   llvm::Value *NumTeamsVal =
11132       NumTeams
11133           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11134                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11135           : CGF.Builder.getInt32(0);
11136 
11137   llvm::Value *ThreadLimitVal =
11138       ThreadLimit
11139           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11140                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11141           : CGF.Builder.getInt32(0);
11142 
11143   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11144   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11145                                      ThreadLimitVal};
11146   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11147                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11148                       PushNumTeamsArgs);
11149 }
11150 
11151 void CGOpenMPRuntime::emitTargetDataCalls(
11152     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11153     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11154   if (!CGF.HaveInsertPoint())
11155     return;
11156 
11157   // Action used to replace the default codegen action and turn privatization
11158   // off.
11159   PrePostActionTy NoPrivAction;
11160 
11161   // Generate the code for the opening of the data environment. Capture all the
11162   // arguments of the runtime call by reference because they are used in the
11163   // closing of the region.
11164   auto &&BeginThenGen = [this, &D, Device, &Info,
11165                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11166     // Fill up the arrays with all the mapped variables.
11167     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11168 
11169     // Get map clause information.
11170     MappableExprsHandler MEHandler(D, CGF);
11171     MEHandler.generateAllInfo(CombinedInfo);
11172 
11173     // Fill up the arrays and create the arguments.
11174     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11175                          /*IsNonContiguous=*/true);
11176 
11177     llvm::Value *BasePointersArrayArg = nullptr;
11178     llvm::Value *PointersArrayArg = nullptr;
11179     llvm::Value *SizesArrayArg = nullptr;
11180     llvm::Value *MapTypesArrayArg = nullptr;
11181     llvm::Value *MapNamesArrayArg = nullptr;
11182     llvm::Value *MappersArrayArg = nullptr;
11183     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11184                                  SizesArrayArg, MapTypesArrayArg,
11185                                  MapNamesArrayArg, MappersArrayArg, Info);
11186 
11187     // Emit device ID if any.
11188     llvm::Value *DeviceID = nullptr;
11189     if (Device) {
11190       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11191                                            CGF.Int64Ty, /*isSigned=*/true);
11192     } else {
11193       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11194     }
11195 
11196     // Emit the number of elements in the offloading arrays.
11197     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11198     //
11199     // Source location for the ident struct
11200     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11201 
11202     llvm::Value *OffloadingArgs[] = {RTLoc,
11203                                      DeviceID,
11204                                      PointerNum,
11205                                      BasePointersArrayArg,
11206                                      PointersArrayArg,
11207                                      SizesArrayArg,
11208                                      MapTypesArrayArg,
11209                                      MapNamesArrayArg,
11210                                      MappersArrayArg};
11211     CGF.EmitRuntimeCall(
11212         OMPBuilder.getOrCreateRuntimeFunction(
11213             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11214         OffloadingArgs);
11215 
11216     // If device pointer privatization is required, emit the body of the region
11217     // here. It will have to be duplicated: with and without privatization.
11218     if (!Info.CaptureDeviceAddrMap.empty())
11219       CodeGen(CGF);
11220   };
11221 
11222   // Generate code for the closing of the data region.
11223   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11224                                                 PrePostActionTy &) {
11225     assert(Info.isValid() && "Invalid data environment closing arguments.");
11226 
11227     llvm::Value *BasePointersArrayArg = nullptr;
11228     llvm::Value *PointersArrayArg = nullptr;
11229     llvm::Value *SizesArrayArg = nullptr;
11230     llvm::Value *MapTypesArrayArg = nullptr;
11231     llvm::Value *MapNamesArrayArg = nullptr;
11232     llvm::Value *MappersArrayArg = nullptr;
11233     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11234                                  SizesArrayArg, MapTypesArrayArg,
11235                                  MapNamesArrayArg, MappersArrayArg, Info,
11236                                  {/*ForEndCall=*/true});
11237 
11238     // Emit device ID if any.
11239     llvm::Value *DeviceID = nullptr;
11240     if (Device) {
11241       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11242                                            CGF.Int64Ty, /*isSigned=*/true);
11243     } else {
11244       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11245     }
11246 
11247     // Emit the number of elements in the offloading arrays.
11248     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11249 
11250     // Source location for the ident struct
11251     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11252 
11253     llvm::Value *OffloadingArgs[] = {RTLoc,
11254                                      DeviceID,
11255                                      PointerNum,
11256                                      BasePointersArrayArg,
11257                                      PointersArrayArg,
11258                                      SizesArrayArg,
11259                                      MapTypesArrayArg,
11260                                      MapNamesArrayArg,
11261                                      MappersArrayArg};
11262     CGF.EmitRuntimeCall(
11263         OMPBuilder.getOrCreateRuntimeFunction(
11264             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11265         OffloadingArgs);
11266   };
11267 
11268   // If we need device pointer privatization, we need to emit the body of the
11269   // region with no privatization in the 'else' branch of the conditional.
11270   // Otherwise, we don't have to do anything.
11271   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11272                                                          PrePostActionTy &) {
11273     if (!Info.CaptureDeviceAddrMap.empty()) {
11274       CodeGen.setAction(NoPrivAction);
11275       CodeGen(CGF);
11276     }
11277   };
11278 
11279   // We don't have to do anything to close the region if the if clause evaluates
11280   // to false.
11281   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11282 
11283   if (IfCond) {
11284     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11285   } else {
11286     RegionCodeGenTy RCG(BeginThenGen);
11287     RCG(CGF);
11288   }
11289 
11290   // If we don't require privatization of device pointers, we emit the body in
11291   // between the runtime calls. This avoids duplicating the body code.
11292   if (Info.CaptureDeviceAddrMap.empty()) {
11293     CodeGen.setAction(NoPrivAction);
11294     CodeGen(CGF);
11295   }
11296 
11297   if (IfCond) {
11298     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11299   } else {
11300     RegionCodeGenTy RCG(EndThenGen);
11301     RCG(CGF);
11302   }
11303 }
11304 
11305 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11306     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11307     const Expr *Device) {
11308   if (!CGF.HaveInsertPoint())
11309     return;
11310 
11311   assert((isa<OMPTargetEnterDataDirective>(D) ||
11312           isa<OMPTargetExitDataDirective>(D) ||
11313           isa<OMPTargetUpdateDirective>(D)) &&
11314          "Expecting either target enter, exit data, or update directives.");
11315 
11316   CodeGenFunction::OMPTargetDataInfo InputInfo;
11317   llvm::Value *MapTypesArray = nullptr;
11318   llvm::Value *MapNamesArray = nullptr;
11319   // Generate the code for the opening of the data environment.
11320   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11321                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11322     // Emit device ID if any.
11323     llvm::Value *DeviceID = nullptr;
11324     if (Device) {
11325       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11326                                            CGF.Int64Ty, /*isSigned=*/true);
11327     } else {
11328       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11329     }
11330 
11331     // Emit the number of elements in the offloading arrays.
11332     llvm::Constant *PointerNum =
11333         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11334 
11335     // Source location for the ident struct
11336     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11337 
11338     llvm::Value *OffloadingArgs[] = {RTLoc,
11339                                      DeviceID,
11340                                      PointerNum,
11341                                      InputInfo.BasePointersArray.getPointer(),
11342                                      InputInfo.PointersArray.getPointer(),
11343                                      InputInfo.SizesArray.getPointer(),
11344                                      MapTypesArray,
11345                                      MapNamesArray,
11346                                      InputInfo.MappersArray.getPointer()};
11347 
11348     // Select the right runtime function call for each standalone
11349     // directive.
11350     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11351     RuntimeFunction RTLFn;
11352     switch (D.getDirectiveKind()) {
11353     case OMPD_target_enter_data:
11354       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11355                         : OMPRTL___tgt_target_data_begin_mapper;
11356       break;
11357     case OMPD_target_exit_data:
11358       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11359                         : OMPRTL___tgt_target_data_end_mapper;
11360       break;
11361     case OMPD_target_update:
11362       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11363                         : OMPRTL___tgt_target_data_update_mapper;
11364       break;
11365     case OMPD_parallel:
11366     case OMPD_for:
11367     case OMPD_parallel_for:
11368     case OMPD_parallel_master:
11369     case OMPD_parallel_sections:
11370     case OMPD_for_simd:
11371     case OMPD_parallel_for_simd:
11372     case OMPD_cancel:
11373     case OMPD_cancellation_point:
11374     case OMPD_ordered:
11375     case OMPD_threadprivate:
11376     case OMPD_allocate:
11377     case OMPD_task:
11378     case OMPD_simd:
11379     case OMPD_tile:
11380     case OMPD_unroll:
11381     case OMPD_sections:
11382     case OMPD_section:
11383     case OMPD_single:
11384     case OMPD_master:
11385     case OMPD_critical:
11386     case OMPD_taskyield:
11387     case OMPD_barrier:
11388     case OMPD_taskwait:
11389     case OMPD_taskgroup:
11390     case OMPD_atomic:
11391     case OMPD_flush:
11392     case OMPD_depobj:
11393     case OMPD_scan:
11394     case OMPD_teams:
11395     case OMPD_target_data:
11396     case OMPD_distribute:
11397     case OMPD_distribute_simd:
11398     case OMPD_distribute_parallel_for:
11399     case OMPD_distribute_parallel_for_simd:
11400     case OMPD_teams_distribute:
11401     case OMPD_teams_distribute_simd:
11402     case OMPD_teams_distribute_parallel_for:
11403     case OMPD_teams_distribute_parallel_for_simd:
11404     case OMPD_declare_simd:
11405     case OMPD_declare_variant:
11406     case OMPD_begin_declare_variant:
11407     case OMPD_end_declare_variant:
11408     case OMPD_declare_target:
11409     case OMPD_end_declare_target:
11410     case OMPD_declare_reduction:
11411     case OMPD_declare_mapper:
11412     case OMPD_taskloop:
11413     case OMPD_taskloop_simd:
11414     case OMPD_master_taskloop:
11415     case OMPD_master_taskloop_simd:
11416     case OMPD_parallel_master_taskloop:
11417     case OMPD_parallel_master_taskloop_simd:
11418     case OMPD_target:
11419     case OMPD_target_simd:
11420     case OMPD_target_teams_distribute:
11421     case OMPD_target_teams_distribute_simd:
11422     case OMPD_target_teams_distribute_parallel_for:
11423     case OMPD_target_teams_distribute_parallel_for_simd:
11424     case OMPD_target_teams:
11425     case OMPD_target_parallel:
11426     case OMPD_target_parallel_for:
11427     case OMPD_target_parallel_for_simd:
11428     case OMPD_requires:
11429     case OMPD_metadirective:
11430     case OMPD_unknown:
11431     default:
11432       llvm_unreachable("Unexpected standalone target data directive.");
11433       break;
11434     }
11435     CGF.EmitRuntimeCall(
11436         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11437         OffloadingArgs);
11438   };
11439 
11440   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11441                           &MapNamesArray](CodeGenFunction &CGF,
11442                                           PrePostActionTy &) {
11443     // Fill up the arrays with all the mapped variables.
11444     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11445 
11446     // Get map clause information.
11447     MappableExprsHandler MEHandler(D, CGF);
11448     MEHandler.generateAllInfo(CombinedInfo);
11449 
11450     TargetDataInfo Info;
11451     // Fill up the arrays and create the arguments.
11452     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11453                          /*IsNonContiguous=*/true);
11454     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11455                              D.hasClausesOfKind<OMPNowaitClause>();
11456     emitOffloadingArraysArgument(
11457         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11458         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11459         {/*ForEndTask=*/false});
11460     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11461     InputInfo.BasePointersArray =
11462         Address(Info.BasePointersArray, CGM.getPointerAlign());
11463     InputInfo.PointersArray =
11464         Address(Info.PointersArray, CGM.getPointerAlign());
11465     InputInfo.SizesArray =
11466         Address(Info.SizesArray, CGM.getPointerAlign());
11467     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11468     MapTypesArray = Info.MapTypesArray;
11469     MapNamesArray = Info.MapNamesArray;
11470     if (RequiresOuterTask)
11471       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11472     else
11473       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11474   };
11475 
11476   if (IfCond) {
11477     emitIfClause(CGF, IfCond, TargetThenGen,
11478                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11479   } else {
11480     RegionCodeGenTy ThenRCG(TargetThenGen);
11481     ThenRCG(CGF);
11482   }
11483 }
11484 
11485 namespace {
11486   /// Kind of parameter in a function with 'declare simd' directive.
11487   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11488   /// Attribute set of the parameter.
11489   struct ParamAttrTy {
11490     ParamKindTy Kind = Vector;
11491     llvm::APSInt StrideOrArg;
11492     llvm::APSInt Alignment;
11493   };
11494 } // namespace
11495 
11496 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11497                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11498   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11499   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11500   // of that clause. The VLEN value must be power of 2.
11501   // In other case the notion of the function`s "characteristic data type" (CDT)
11502   // is used to compute the vector length.
11503   // CDT is defined in the following order:
11504   //   a) For non-void function, the CDT is the return type.
11505   //   b) If the function has any non-uniform, non-linear parameters, then the
11506   //   CDT is the type of the first such parameter.
11507   //   c) If the CDT determined by a) or b) above is struct, union, or class
11508   //   type which is pass-by-value (except for the type that maps to the
11509   //   built-in complex data type), the characteristic data type is int.
11510   //   d) If none of the above three cases is applicable, the CDT is int.
11511   // The VLEN is then determined based on the CDT and the size of vector
11512   // register of that ISA for which current vector version is generated. The
11513   // VLEN is computed using the formula below:
11514   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11515   // where vector register size specified in section 3.2.1 Registers and the
11516   // Stack Frame of original AMD64 ABI document.
11517   QualType RetType = FD->getReturnType();
11518   if (RetType.isNull())
11519     return 0;
11520   ASTContext &C = FD->getASTContext();
11521   QualType CDT;
11522   if (!RetType.isNull() && !RetType->isVoidType()) {
11523     CDT = RetType;
11524   } else {
11525     unsigned Offset = 0;
11526     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11527       if (ParamAttrs[Offset].Kind == Vector)
11528         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11529       ++Offset;
11530     }
11531     if (CDT.isNull()) {
11532       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11533         if (ParamAttrs[I + Offset].Kind == Vector) {
11534           CDT = FD->getParamDecl(I)->getType();
11535           break;
11536         }
11537       }
11538     }
11539   }
11540   if (CDT.isNull())
11541     CDT = C.IntTy;
11542   CDT = CDT->getCanonicalTypeUnqualified();
11543   if (CDT->isRecordType() || CDT->isUnionType())
11544     CDT = C.IntTy;
11545   return C.getTypeSize(CDT);
11546 }
11547 
11548 static void
11549 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11550                            const llvm::APSInt &VLENVal,
11551                            ArrayRef<ParamAttrTy> ParamAttrs,
11552                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11553   struct ISADataTy {
11554     char ISA;
11555     unsigned VecRegSize;
11556   };
11557   ISADataTy ISAData[] = {
11558       {
11559           'b', 128
11560       }, // SSE
11561       {
11562           'c', 256
11563       }, // AVX
11564       {
11565           'd', 256
11566       }, // AVX2
11567       {
11568           'e', 512
11569       }, // AVX512
11570   };
11571   llvm::SmallVector<char, 2> Masked;
11572   switch (State) {
11573   case OMPDeclareSimdDeclAttr::BS_Undefined:
11574     Masked.push_back('N');
11575     Masked.push_back('M');
11576     break;
11577   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11578     Masked.push_back('N');
11579     break;
11580   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11581     Masked.push_back('M');
11582     break;
11583   }
11584   for (char Mask : Masked) {
11585     for (const ISADataTy &Data : ISAData) {
11586       SmallString<256> Buffer;
11587       llvm::raw_svector_ostream Out(Buffer);
11588       Out << "_ZGV" << Data.ISA << Mask;
11589       if (!VLENVal) {
11590         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11591         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11592         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11593       } else {
11594         Out << VLENVal;
11595       }
11596       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11597         switch (ParamAttr.Kind){
11598         case LinearWithVarStride:
11599           Out << 's' << ParamAttr.StrideOrArg;
11600           break;
11601         case Linear:
11602           Out << 'l';
11603           if (ParamAttr.StrideOrArg != 1)
11604             Out << ParamAttr.StrideOrArg;
11605           break;
11606         case Uniform:
11607           Out << 'u';
11608           break;
11609         case Vector:
11610           Out << 'v';
11611           break;
11612         }
11613         if (!!ParamAttr.Alignment)
11614           Out << 'a' << ParamAttr.Alignment;
11615       }
11616       Out << '_' << Fn->getName();
11617       Fn->addFnAttr(Out.str());
11618     }
11619   }
11620 }
11621 
11622 // This are the Functions that are needed to mangle the name of the
11623 // vector functions generated by the compiler, according to the rules
11624 // defined in the "Vector Function ABI specifications for AArch64",
11625 // available at
11626 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11627 
11628 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11629 ///
11630 /// TODO: Need to implement the behavior for reference marked with a
11631 /// var or no linear modifiers (1.b in the section). For this, we
11632 /// need to extend ParamKindTy to support the linear modifiers.
11633 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11634   QT = QT.getCanonicalType();
11635 
11636   if (QT->isVoidType())
11637     return false;
11638 
11639   if (Kind == ParamKindTy::Uniform)
11640     return false;
11641 
11642   if (Kind == ParamKindTy::Linear)
11643     return false;
11644 
11645   // TODO: Handle linear references with modifiers
11646 
11647   if (Kind == ParamKindTy::LinearWithVarStride)
11648     return false;
11649 
11650   return true;
11651 }
11652 
11653 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11654 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11655   QT = QT.getCanonicalType();
11656   unsigned Size = C.getTypeSize(QT);
11657 
11658   // Only scalars and complex within 16 bytes wide set PVB to true.
11659   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11660     return false;
11661 
11662   if (QT->isFloatingType())
11663     return true;
11664 
11665   if (QT->isIntegerType())
11666     return true;
11667 
11668   if (QT->isPointerType())
11669     return true;
11670 
11671   // TODO: Add support for complex types (section 3.1.2, item 2).
11672 
11673   return false;
11674 }
11675 
11676 /// Computes the lane size (LS) of a return type or of an input parameter,
11677 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11678 /// TODO: Add support for references, section 3.2.1, item 1.
11679 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11680   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11681     QualType PTy = QT.getCanonicalType()->getPointeeType();
11682     if (getAArch64PBV(PTy, C))
11683       return C.getTypeSize(PTy);
11684   }
11685   if (getAArch64PBV(QT, C))
11686     return C.getTypeSize(QT);
11687 
11688   return C.getTypeSize(C.getUIntPtrType());
11689 }
11690 
11691 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11692 // signature of the scalar function, as defined in 3.2.2 of the
11693 // AAVFABI.
11694 static std::tuple<unsigned, unsigned, bool>
11695 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11696   QualType RetType = FD->getReturnType().getCanonicalType();
11697 
11698   ASTContext &C = FD->getASTContext();
11699 
11700   bool OutputBecomesInput = false;
11701 
11702   llvm::SmallVector<unsigned, 8> Sizes;
11703   if (!RetType->isVoidType()) {
11704     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11705     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11706       OutputBecomesInput = true;
11707   }
11708   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11709     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11710     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11711   }
11712 
11713   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11714   // The LS of a function parameter / return value can only be a power
11715   // of 2, starting from 8 bits, up to 128.
11716   assert(llvm::all_of(Sizes,
11717                       [](unsigned Size) {
11718                         return Size == 8 || Size == 16 || Size == 32 ||
11719                                Size == 64 || Size == 128;
11720                       }) &&
11721          "Invalid size");
11722 
11723   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11724                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11725                          OutputBecomesInput);
11726 }
11727 
11728 /// Mangle the parameter part of the vector function name according to
11729 /// their OpenMP classification. The mangling function is defined in
11730 /// section 3.5 of the AAVFABI.
11731 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11732   SmallString<256> Buffer;
11733   llvm::raw_svector_ostream Out(Buffer);
11734   for (const auto &ParamAttr : ParamAttrs) {
11735     switch (ParamAttr.Kind) {
11736     case LinearWithVarStride:
11737       Out << "ls" << ParamAttr.StrideOrArg;
11738       break;
11739     case Linear:
11740       Out << 'l';
11741       // Don't print the step value if it is not present or if it is
11742       // equal to 1.
11743       if (ParamAttr.StrideOrArg != 1)
11744         Out << ParamAttr.StrideOrArg;
11745       break;
11746     case Uniform:
11747       Out << 'u';
11748       break;
11749     case Vector:
11750       Out << 'v';
11751       break;
11752     }
11753 
11754     if (!!ParamAttr.Alignment)
11755       Out << 'a' << ParamAttr.Alignment;
11756   }
11757 
11758   return std::string(Out.str());
11759 }
11760 
11761 // Function used to add the attribute. The parameter `VLEN` is
11762 // templated to allow the use of "x" when targeting scalable functions
11763 // for SVE.
11764 template <typename T>
11765 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11766                                  char ISA, StringRef ParSeq,
11767                                  StringRef MangledName, bool OutputBecomesInput,
11768                                  llvm::Function *Fn) {
11769   SmallString<256> Buffer;
11770   llvm::raw_svector_ostream Out(Buffer);
11771   Out << Prefix << ISA << LMask << VLEN;
11772   if (OutputBecomesInput)
11773     Out << "v";
11774   Out << ParSeq << "_" << MangledName;
11775   Fn->addFnAttr(Out.str());
11776 }
11777 
11778 // Helper function to generate the Advanced SIMD names depending on
11779 // the value of the NDS when simdlen is not present.
11780 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11781                                       StringRef Prefix, char ISA,
11782                                       StringRef ParSeq, StringRef MangledName,
11783                                       bool OutputBecomesInput,
11784                                       llvm::Function *Fn) {
11785   switch (NDS) {
11786   case 8:
11787     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11788                          OutputBecomesInput, Fn);
11789     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11790                          OutputBecomesInput, Fn);
11791     break;
11792   case 16:
11793     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11794                          OutputBecomesInput, Fn);
11795     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11796                          OutputBecomesInput, Fn);
11797     break;
11798   case 32:
11799     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11800                          OutputBecomesInput, Fn);
11801     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11802                          OutputBecomesInput, Fn);
11803     break;
11804   case 64:
11805   case 128:
11806     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11807                          OutputBecomesInput, Fn);
11808     break;
11809   default:
11810     llvm_unreachable("Scalar type is too wide.");
11811   }
11812 }
11813 
11814 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11815 static void emitAArch64DeclareSimdFunction(
11816     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11817     ArrayRef<ParamAttrTy> ParamAttrs,
11818     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11819     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11820 
11821   // Get basic data for building the vector signature.
11822   const auto Data = getNDSWDS(FD, ParamAttrs);
11823   const unsigned NDS = std::get<0>(Data);
11824   const unsigned WDS = std::get<1>(Data);
11825   const bool OutputBecomesInput = std::get<2>(Data);
11826 
11827   // Check the values provided via `simdlen` by the user.
11828   // 1. A `simdlen(1)` doesn't produce vector signatures,
11829   if (UserVLEN == 1) {
11830     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11831         DiagnosticsEngine::Warning,
11832         "The clause simdlen(1) has no effect when targeting aarch64.");
11833     CGM.getDiags().Report(SLoc, DiagID);
11834     return;
11835   }
11836 
11837   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11838   // Advanced SIMD output.
11839   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11840     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11841         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11842                                     "power of 2 when targeting Advanced SIMD.");
11843     CGM.getDiags().Report(SLoc, DiagID);
11844     return;
11845   }
11846 
11847   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11848   // limits.
11849   if (ISA == 's' && UserVLEN != 0) {
11850     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11851       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11852           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11853                                       "lanes in the architectural constraints "
11854                                       "for SVE (min is 128-bit, max is "
11855                                       "2048-bit, by steps of 128-bit)");
11856       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11857       return;
11858     }
11859   }
11860 
11861   // Sort out parameter sequence.
11862   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11863   StringRef Prefix = "_ZGV";
11864   // Generate simdlen from user input (if any).
11865   if (UserVLEN) {
11866     if (ISA == 's') {
11867       // SVE generates only a masked function.
11868       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11869                            OutputBecomesInput, Fn);
11870     } else {
11871       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11872       // Advanced SIMD generates one or two functions, depending on
11873       // the `[not]inbranch` clause.
11874       switch (State) {
11875       case OMPDeclareSimdDeclAttr::BS_Undefined:
11876         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11877                              OutputBecomesInput, Fn);
11878         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11879                              OutputBecomesInput, Fn);
11880         break;
11881       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11882         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11883                              OutputBecomesInput, Fn);
11884         break;
11885       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11886         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11887                              OutputBecomesInput, Fn);
11888         break;
11889       }
11890     }
11891   } else {
11892     // If no user simdlen is provided, follow the AAVFABI rules for
11893     // generating the vector length.
11894     if (ISA == 's') {
11895       // SVE, section 3.4.1, item 1.
11896       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11897                            OutputBecomesInput, Fn);
11898     } else {
11899       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11900       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11901       // two vector names depending on the use of the clause
11902       // `[not]inbranch`.
11903       switch (State) {
11904       case OMPDeclareSimdDeclAttr::BS_Undefined:
11905         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11906                                   OutputBecomesInput, Fn);
11907         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11908                                   OutputBecomesInput, Fn);
11909         break;
11910       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11911         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11912                                   OutputBecomesInput, Fn);
11913         break;
11914       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11915         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11916                                   OutputBecomesInput, Fn);
11917         break;
11918       }
11919     }
11920   }
11921 }
11922 
11923 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11924                                               llvm::Function *Fn) {
11925   ASTContext &C = CGM.getContext();
11926   FD = FD->getMostRecentDecl();
11927   // Map params to their positions in function decl.
11928   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11929   if (isa<CXXMethodDecl>(FD))
11930     ParamPositions.try_emplace(FD, 0);
11931   unsigned ParamPos = ParamPositions.size();
11932   for (const ParmVarDecl *P : FD->parameters()) {
11933     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11934     ++ParamPos;
11935   }
11936   while (FD) {
11937     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11938       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11939       // Mark uniform parameters.
11940       for (const Expr *E : Attr->uniforms()) {
11941         E = E->IgnoreParenImpCasts();
11942         unsigned Pos;
11943         if (isa<CXXThisExpr>(E)) {
11944           Pos = ParamPositions[FD];
11945         } else {
11946           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11947                                 ->getCanonicalDecl();
11948           Pos = ParamPositions[PVD];
11949         }
11950         ParamAttrs[Pos].Kind = Uniform;
11951       }
11952       // Get alignment info.
11953       auto NI = Attr->alignments_begin();
11954       for (const Expr *E : Attr->aligneds()) {
11955         E = E->IgnoreParenImpCasts();
11956         unsigned Pos;
11957         QualType ParmTy;
11958         if (isa<CXXThisExpr>(E)) {
11959           Pos = ParamPositions[FD];
11960           ParmTy = E->getType();
11961         } else {
11962           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11963                                 ->getCanonicalDecl();
11964           Pos = ParamPositions[PVD];
11965           ParmTy = PVD->getType();
11966         }
11967         ParamAttrs[Pos].Alignment =
11968             (*NI)
11969                 ? (*NI)->EvaluateKnownConstInt(C)
11970                 : llvm::APSInt::getUnsigned(
11971                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11972                           .getQuantity());
11973         ++NI;
11974       }
11975       // Mark linear parameters.
11976       auto SI = Attr->steps_begin();
11977       auto MI = Attr->modifiers_begin();
11978       for (const Expr *E : Attr->linears()) {
11979         E = E->IgnoreParenImpCasts();
11980         unsigned Pos;
11981         // Rescaling factor needed to compute the linear parameter
11982         // value in the mangled name.
11983         unsigned PtrRescalingFactor = 1;
11984         if (isa<CXXThisExpr>(E)) {
11985           Pos = ParamPositions[FD];
11986         } else {
11987           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11988                                 ->getCanonicalDecl();
11989           Pos = ParamPositions[PVD];
11990           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11991             PtrRescalingFactor = CGM.getContext()
11992                                      .getTypeSizeInChars(P->getPointeeType())
11993                                      .getQuantity();
11994         }
11995         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11996         ParamAttr.Kind = Linear;
11997         // Assuming a stride of 1, for `linear` without modifiers.
11998         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11999         if (*SI) {
12000           Expr::EvalResult Result;
12001           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12002             if (const auto *DRE =
12003                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12004               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
12005                 ParamAttr.Kind = LinearWithVarStride;
12006                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12007                     ParamPositions[StridePVD->getCanonicalDecl()]);
12008               }
12009             }
12010           } else {
12011             ParamAttr.StrideOrArg = Result.Val.getInt();
12012           }
12013         }
12014         // If we are using a linear clause on a pointer, we need to
12015         // rescale the value of linear_step with the byte size of the
12016         // pointee type.
12017         if (Linear == ParamAttr.Kind)
12018           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12019         ++SI;
12020         ++MI;
12021       }
12022       llvm::APSInt VLENVal;
12023       SourceLocation ExprLoc;
12024       const Expr *VLENExpr = Attr->getSimdlen();
12025       if (VLENExpr) {
12026         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12027         ExprLoc = VLENExpr->getExprLoc();
12028       }
12029       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12030       if (CGM.getTriple().isX86()) {
12031         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12032       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12033         unsigned VLEN = VLENVal.getExtValue();
12034         StringRef MangledName = Fn->getName();
12035         if (CGM.getTarget().hasFeature("sve"))
12036           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12037                                          MangledName, 's', 128, Fn, ExprLoc);
12038         if (CGM.getTarget().hasFeature("neon"))
12039           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12040                                          MangledName, 'n', 128, Fn, ExprLoc);
12041       }
12042     }
12043     FD = FD->getPreviousDecl();
12044   }
12045 }
12046 
12047 namespace {
12048 /// Cleanup action for doacross support.
12049 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12050 public:
12051   static const int DoacrossFinArgs = 2;
12052 
12053 private:
12054   llvm::FunctionCallee RTLFn;
12055   llvm::Value *Args[DoacrossFinArgs];
12056 
12057 public:
12058   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12059                     ArrayRef<llvm::Value *> CallArgs)
12060       : RTLFn(RTLFn) {
12061     assert(CallArgs.size() == DoacrossFinArgs);
12062     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12063   }
12064   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12065     if (!CGF.HaveInsertPoint())
12066       return;
12067     CGF.EmitRuntimeCall(RTLFn, Args);
12068   }
12069 };
12070 } // namespace
12071 
12072 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12073                                        const OMPLoopDirective &D,
12074                                        ArrayRef<Expr *> NumIterations) {
12075   if (!CGF.HaveInsertPoint())
12076     return;
12077 
12078   ASTContext &C = CGM.getContext();
12079   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12080   RecordDecl *RD;
12081   if (KmpDimTy.isNull()) {
12082     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12083     //  kmp_int64 lo; // lower
12084     //  kmp_int64 up; // upper
12085     //  kmp_int64 st; // stride
12086     // };
12087     RD = C.buildImplicitRecord("kmp_dim");
12088     RD->startDefinition();
12089     addFieldToRecordDecl(C, RD, Int64Ty);
12090     addFieldToRecordDecl(C, RD, Int64Ty);
12091     addFieldToRecordDecl(C, RD, Int64Ty);
12092     RD->completeDefinition();
12093     KmpDimTy = C.getRecordType(RD);
12094   } else {
12095     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12096   }
12097   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12098   QualType ArrayTy =
12099       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12100 
12101   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12102   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12103   enum { LowerFD = 0, UpperFD, StrideFD };
12104   // Fill dims with data.
12105   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12106     LValue DimsLVal = CGF.MakeAddrLValue(
12107         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12108     // dims.upper = num_iterations;
12109     LValue UpperLVal = CGF.EmitLValueForField(
12110         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12111     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12112         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12113         Int64Ty, NumIterations[I]->getExprLoc());
12114     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12115     // dims.stride = 1;
12116     LValue StrideLVal = CGF.EmitLValueForField(
12117         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12118     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12119                           StrideLVal);
12120   }
12121 
12122   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12123   // kmp_int32 num_dims, struct kmp_dim * dims);
12124   llvm::Value *Args[] = {
12125       emitUpdateLocation(CGF, D.getBeginLoc()),
12126       getThreadID(CGF, D.getBeginLoc()),
12127       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12128       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12129           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12130           CGM.VoidPtrTy)};
12131 
12132   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12133       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12134   CGF.EmitRuntimeCall(RTLFn, Args);
12135   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12136       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12137   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12138       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12139   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12140                                              llvm::makeArrayRef(FiniArgs));
12141 }
12142 
12143 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12144                                           const OMPDependClause *C) {
12145   QualType Int64Ty =
12146       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12147   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12148   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12149       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12150   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12151   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12152     const Expr *CounterVal = C->getLoopData(I);
12153     assert(CounterVal);
12154     llvm::Value *CntVal = CGF.EmitScalarConversion(
12155         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12156         CounterVal->getExprLoc());
12157     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12158                           /*Volatile=*/false, Int64Ty);
12159   }
12160   llvm::Value *Args[] = {
12161       emitUpdateLocation(CGF, C->getBeginLoc()),
12162       getThreadID(CGF, C->getBeginLoc()),
12163       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12164   llvm::FunctionCallee RTLFn;
12165   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12166     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12167                                                   OMPRTL___kmpc_doacross_post);
12168   } else {
12169     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12170     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12171                                                   OMPRTL___kmpc_doacross_wait);
12172   }
12173   CGF.EmitRuntimeCall(RTLFn, Args);
12174 }
12175 
12176 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12177                                llvm::FunctionCallee Callee,
12178                                ArrayRef<llvm::Value *> Args) const {
12179   assert(Loc.isValid() && "Outlined function call location must be valid.");
12180   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12181 
12182   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12183     if (Fn->doesNotThrow()) {
12184       CGF.EmitNounwindRuntimeCall(Fn, Args);
12185       return;
12186     }
12187   }
12188   CGF.EmitRuntimeCall(Callee, Args);
12189 }
12190 
12191 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12192     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12193     ArrayRef<llvm::Value *> Args) const {
12194   emitCall(CGF, Loc, OutlinedFn, Args);
12195 }
12196 
12197 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12198   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12199     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12200       HasEmittedDeclareTargetRegion = true;
12201 }
12202 
12203 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12204                                              const VarDecl *NativeParam,
12205                                              const VarDecl *TargetParam) const {
12206   return CGF.GetAddrOfLocalVar(NativeParam);
12207 }
12208 
12209 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12210                                                    const VarDecl *VD) {
12211   if (!VD)
12212     return Address::invalid();
12213   Address UntiedAddr = Address::invalid();
12214   Address UntiedRealAddr = Address::invalid();
12215   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12216   if (It != FunctionToUntiedTaskStackMap.end()) {
12217     const UntiedLocalVarsAddressesMap &UntiedData =
12218         UntiedLocalVarsStack[It->second];
12219     auto I = UntiedData.find(VD);
12220     if (I != UntiedData.end()) {
12221       UntiedAddr = I->second.first;
12222       UntiedRealAddr = I->second.second;
12223     }
12224   }
12225   const VarDecl *CVD = VD->getCanonicalDecl();
12226   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12227     // Use the default allocation.
12228     if (!isAllocatableDecl(VD))
12229       return UntiedAddr;
12230     llvm::Value *Size;
12231     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12232     if (CVD->getType()->isVariablyModifiedType()) {
12233       Size = CGF.getTypeSize(CVD->getType());
12234       // Align the size: ((size + align - 1) / align) * align
12235       Size = CGF.Builder.CreateNUWAdd(
12236           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12237       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12238       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12239     } else {
12240       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12241       Size = CGM.getSize(Sz.alignTo(Align));
12242     }
12243     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12244     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12245     assert(AA->getAllocator() &&
12246            "Expected allocator expression for non-default allocator.");
12247     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12248     // According to the standard, the original allocator type is a enum
12249     // (integer). Convert to pointer type, if required.
12250     Allocator = CGF.EmitScalarConversion(
12251         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12252         AA->getAllocator()->getExprLoc());
12253     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12254 
12255     llvm::Value *Addr =
12256         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12257                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12258                             Args, getName({CVD->getName(), ".void.addr"}));
12259     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12260         CGM.getModule(), OMPRTL___kmpc_free);
12261     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12262     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12263         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12264     if (UntiedAddr.isValid())
12265       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12266 
12267     // Cleanup action for allocate support.
12268     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12269       llvm::FunctionCallee RTLFn;
12270       SourceLocation::UIntTy LocEncoding;
12271       Address Addr;
12272       const Expr *Allocator;
12273 
12274     public:
12275       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12276                            SourceLocation::UIntTy LocEncoding, Address Addr,
12277                            const Expr *Allocator)
12278           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12279             Allocator(Allocator) {}
12280       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12281         if (!CGF.HaveInsertPoint())
12282           return;
12283         llvm::Value *Args[3];
12284         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12285             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12286         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12287             Addr.getPointer(), CGF.VoidPtrTy);
12288         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12289         // According to the standard, the original allocator type is a enum
12290         // (integer). Convert to pointer type, if required.
12291         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12292                                             CGF.getContext().VoidPtrTy,
12293                                             Allocator->getExprLoc());
12294         Args[2] = AllocVal;
12295 
12296         CGF.EmitRuntimeCall(RTLFn, Args);
12297       }
12298     };
12299     Address VDAddr =
12300         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12301     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12302         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12303         VDAddr, AA->getAllocator());
12304     if (UntiedRealAddr.isValid())
12305       if (auto *Region =
12306               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12307         Region->emitUntiedSwitch(CGF);
12308     return VDAddr;
12309   }
12310   return UntiedAddr;
12311 }
12312 
12313 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12314                                              const VarDecl *VD) const {
12315   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12316   if (It == FunctionToUntiedTaskStackMap.end())
12317     return false;
12318   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12319 }
12320 
12321 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12322     CodeGenModule &CGM, const OMPLoopDirective &S)
12323     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12324   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12325   if (!NeedToPush)
12326     return;
12327   NontemporalDeclsSet &DS =
12328       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12329   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12330     for (const Stmt *Ref : C->private_refs()) {
12331       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12332       const ValueDecl *VD;
12333       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12334         VD = DRE->getDecl();
12335       } else {
12336         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12337         assert((ME->isImplicitCXXThis() ||
12338                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12339                "Expected member of current class.");
12340         VD = ME->getMemberDecl();
12341       }
12342       DS.insert(VD);
12343     }
12344   }
12345 }
12346 
12347 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12348   if (!NeedToPush)
12349     return;
12350   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12351 }
12352 
12353 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12354     CodeGenFunction &CGF,
12355     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12356                           std::pair<Address, Address>> &LocalVars)
12357     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12358   if (!NeedToPush)
12359     return;
12360   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12361       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12362   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12363 }
12364 
12365 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12366   if (!NeedToPush)
12367     return;
12368   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12369 }
12370 
12371 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12372   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12373 
12374   return llvm::any_of(
12375       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12376       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12377 }
12378 
12379 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12380     const OMPExecutableDirective &S,
12381     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12382     const {
12383   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12384   // Vars in target/task regions must be excluded completely.
12385   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12386       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12387     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12388     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12389     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12390     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12391       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12392         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12393     }
12394   }
12395   // Exclude vars in private clauses.
12396   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12397     for (const Expr *Ref : C->varlists()) {
12398       if (!Ref->getType()->isScalarType())
12399         continue;
12400       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12401       if (!DRE)
12402         continue;
12403       NeedToCheckForLPCs.insert(DRE->getDecl());
12404     }
12405   }
12406   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12407     for (const Expr *Ref : C->varlists()) {
12408       if (!Ref->getType()->isScalarType())
12409         continue;
12410       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12411       if (!DRE)
12412         continue;
12413       NeedToCheckForLPCs.insert(DRE->getDecl());
12414     }
12415   }
12416   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12417     for (const Expr *Ref : C->varlists()) {
12418       if (!Ref->getType()->isScalarType())
12419         continue;
12420       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12421       if (!DRE)
12422         continue;
12423       NeedToCheckForLPCs.insert(DRE->getDecl());
12424     }
12425   }
12426   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12427     for (const Expr *Ref : C->varlists()) {
12428       if (!Ref->getType()->isScalarType())
12429         continue;
12430       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12431       if (!DRE)
12432         continue;
12433       NeedToCheckForLPCs.insert(DRE->getDecl());
12434     }
12435   }
12436   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12437     for (const Expr *Ref : C->varlists()) {
12438       if (!Ref->getType()->isScalarType())
12439         continue;
12440       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12441       if (!DRE)
12442         continue;
12443       NeedToCheckForLPCs.insert(DRE->getDecl());
12444     }
12445   }
12446   for (const Decl *VD : NeedToCheckForLPCs) {
12447     for (const LastprivateConditionalData &Data :
12448          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12449       if (Data.DeclToUniqueName.count(VD) > 0) {
12450         if (!Data.Disabled)
12451           NeedToAddForLPCsAsDisabled.insert(VD);
12452         break;
12453       }
12454     }
12455   }
12456 }
12457 
12458 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12459     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12460     : CGM(CGF.CGM),
12461       Action((CGM.getLangOpts().OpenMP >= 50 &&
12462               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12463                            [](const OMPLastprivateClause *C) {
12464                              return C->getKind() ==
12465                                     OMPC_LASTPRIVATE_conditional;
12466                            }))
12467                  ? ActionToDo::PushAsLastprivateConditional
12468                  : ActionToDo::DoNotPush) {
12469   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12470   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12471     return;
12472   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12473          "Expected a push action.");
12474   LastprivateConditionalData &Data =
12475       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12476   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12477     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12478       continue;
12479 
12480     for (const Expr *Ref : C->varlists()) {
12481       Data.DeclToUniqueName.insert(std::make_pair(
12482           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12483           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12484     }
12485   }
12486   Data.IVLVal = IVLVal;
12487   Data.Fn = CGF.CurFn;
12488 }
12489 
12490 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12491     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12492     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12493   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12494   if (CGM.getLangOpts().OpenMP < 50)
12495     return;
12496   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12497   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12498   if (!NeedToAddForLPCsAsDisabled.empty()) {
12499     Action = ActionToDo::DisableLastprivateConditional;
12500     LastprivateConditionalData &Data =
12501         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12502     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12503       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12504     Data.Fn = CGF.CurFn;
12505     Data.Disabled = true;
12506   }
12507 }
12508 
12509 CGOpenMPRuntime::LastprivateConditionalRAII
12510 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12511     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12512   return LastprivateConditionalRAII(CGF, S);
12513 }
12514 
12515 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12516   if (CGM.getLangOpts().OpenMP < 50)
12517     return;
12518   if (Action == ActionToDo::DisableLastprivateConditional) {
12519     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12520            "Expected list of disabled private vars.");
12521     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12522   }
12523   if (Action == ActionToDo::PushAsLastprivateConditional) {
12524     assert(
12525         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12526         "Expected list of lastprivate conditional vars.");
12527     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12528   }
12529 }
12530 
12531 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12532                                                         const VarDecl *VD) {
12533   ASTContext &C = CGM.getContext();
12534   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12535   if (I == LastprivateConditionalToTypes.end())
12536     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12537   QualType NewType;
12538   const FieldDecl *VDField;
12539   const FieldDecl *FiredField;
12540   LValue BaseLVal;
12541   auto VI = I->getSecond().find(VD);
12542   if (VI == I->getSecond().end()) {
12543     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12544     RD->startDefinition();
12545     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12546     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12547     RD->completeDefinition();
12548     NewType = C.getRecordType(RD);
12549     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12550     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12551     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12552   } else {
12553     NewType = std::get<0>(VI->getSecond());
12554     VDField = std::get<1>(VI->getSecond());
12555     FiredField = std::get<2>(VI->getSecond());
12556     BaseLVal = std::get<3>(VI->getSecond());
12557   }
12558   LValue FiredLVal =
12559       CGF.EmitLValueForField(BaseLVal, FiredField);
12560   CGF.EmitStoreOfScalar(
12561       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12562       FiredLVal);
12563   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12564 }
12565 
12566 namespace {
12567 /// Checks if the lastprivate conditional variable is referenced in LHS.
12568 class LastprivateConditionalRefChecker final
12569     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12570   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12571   const Expr *FoundE = nullptr;
12572   const Decl *FoundD = nullptr;
12573   StringRef UniqueDeclName;
12574   LValue IVLVal;
12575   llvm::Function *FoundFn = nullptr;
12576   SourceLocation Loc;
12577 
12578 public:
12579   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12580     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12581          llvm::reverse(LPM)) {
12582       auto It = D.DeclToUniqueName.find(E->getDecl());
12583       if (It == D.DeclToUniqueName.end())
12584         continue;
12585       if (D.Disabled)
12586         return false;
12587       FoundE = E;
12588       FoundD = E->getDecl()->getCanonicalDecl();
12589       UniqueDeclName = It->second;
12590       IVLVal = D.IVLVal;
12591       FoundFn = D.Fn;
12592       break;
12593     }
12594     return FoundE == E;
12595   }
12596   bool VisitMemberExpr(const MemberExpr *E) {
12597     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12598       return false;
12599     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12600          llvm::reverse(LPM)) {
12601       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12602       if (It == D.DeclToUniqueName.end())
12603         continue;
12604       if (D.Disabled)
12605         return false;
12606       FoundE = E;
12607       FoundD = E->getMemberDecl()->getCanonicalDecl();
12608       UniqueDeclName = It->second;
12609       IVLVal = D.IVLVal;
12610       FoundFn = D.Fn;
12611       break;
12612     }
12613     return FoundE == E;
12614   }
12615   bool VisitStmt(const Stmt *S) {
12616     for (const Stmt *Child : S->children()) {
12617       if (!Child)
12618         continue;
12619       if (const auto *E = dyn_cast<Expr>(Child))
12620         if (!E->isGLValue())
12621           continue;
12622       if (Visit(Child))
12623         return true;
12624     }
12625     return false;
12626   }
12627   explicit LastprivateConditionalRefChecker(
12628       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12629       : LPM(LPM) {}
12630   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12631   getFoundData() const {
12632     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12633   }
12634 };
12635 } // namespace
12636 
12637 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12638                                                        LValue IVLVal,
12639                                                        StringRef UniqueDeclName,
12640                                                        LValue LVal,
12641                                                        SourceLocation Loc) {
12642   // Last updated loop counter for the lastprivate conditional var.
12643   // int<xx> last_iv = 0;
12644   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12645   llvm::Constant *LastIV =
12646       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12647   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12648       IVLVal.getAlignment().getAsAlign());
12649   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12650 
12651   // Last value of the lastprivate conditional.
12652   // decltype(priv_a) last_a;
12653   llvm::Constant *Last = getOrCreateInternalVariable(
12654       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12655   cast<llvm::GlobalVariable>(Last)->setAlignment(
12656       LVal.getAlignment().getAsAlign());
12657   LValue LastLVal =
12658       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12659 
12660   // Global loop counter. Required to handle inner parallel-for regions.
12661   // iv
12662   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12663 
12664   // #pragma omp critical(a)
12665   // if (last_iv <= iv) {
12666   //   last_iv = iv;
12667   //   last_a = priv_a;
12668   // }
12669   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12670                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12671     Action.Enter(CGF);
12672     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12673     // (last_iv <= iv) ? Check if the variable is updated and store new
12674     // value in global var.
12675     llvm::Value *CmpRes;
12676     if (IVLVal.getType()->isSignedIntegerType()) {
12677       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12678     } else {
12679       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12680              "Loop iteration variable must be integer.");
12681       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12682     }
12683     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12684     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12685     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12686     // {
12687     CGF.EmitBlock(ThenBB);
12688 
12689     //   last_iv = iv;
12690     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12691 
12692     //   last_a = priv_a;
12693     switch (CGF.getEvaluationKind(LVal.getType())) {
12694     case TEK_Scalar: {
12695       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12696       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12697       break;
12698     }
12699     case TEK_Complex: {
12700       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12701       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12702       break;
12703     }
12704     case TEK_Aggregate:
12705       llvm_unreachable(
12706           "Aggregates are not supported in lastprivate conditional.");
12707     }
12708     // }
12709     CGF.EmitBranch(ExitBB);
12710     // There is no need to emit line number for unconditional branch.
12711     (void)ApplyDebugLocation::CreateEmpty(CGF);
12712     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12713   };
12714 
12715   if (CGM.getLangOpts().OpenMPSimd) {
12716     // Do not emit as a critical region as no parallel region could be emitted.
12717     RegionCodeGenTy ThenRCG(CodeGen);
12718     ThenRCG(CGF);
12719   } else {
12720     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12721   }
12722 }
12723 
12724 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12725                                                          const Expr *LHS) {
12726   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12727     return;
12728   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12729   if (!Checker.Visit(LHS))
12730     return;
12731   const Expr *FoundE;
12732   const Decl *FoundD;
12733   StringRef UniqueDeclName;
12734   LValue IVLVal;
12735   llvm::Function *FoundFn;
12736   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12737       Checker.getFoundData();
12738   if (FoundFn != CGF.CurFn) {
12739     // Special codegen for inner parallel regions.
12740     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12741     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12742     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12743            "Lastprivate conditional is not found in outer region.");
12744     QualType StructTy = std::get<0>(It->getSecond());
12745     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12746     LValue PrivLVal = CGF.EmitLValue(FoundE);
12747     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12748         PrivLVal.getAddress(CGF),
12749         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12750     LValue BaseLVal =
12751         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12752     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12753     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12754                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12755                         FiredLVal, llvm::AtomicOrdering::Unordered,
12756                         /*IsVolatile=*/true, /*isInit=*/false);
12757     return;
12758   }
12759 
12760   // Private address of the lastprivate conditional in the current context.
12761   // priv_a
12762   LValue LVal = CGF.EmitLValue(FoundE);
12763   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12764                                    FoundE->getExprLoc());
12765 }
12766 
12767 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12768     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12769     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12770   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12771     return;
12772   auto Range = llvm::reverse(LastprivateConditionalStack);
12773   auto It = llvm::find_if(
12774       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12775   if (It == Range.end() || It->Fn != CGF.CurFn)
12776     return;
12777   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12778   assert(LPCI != LastprivateConditionalToTypes.end() &&
12779          "Lastprivates must be registered already.");
12780   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12781   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12782   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12783   for (const auto &Pair : It->DeclToUniqueName) {
12784     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12785     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12786       continue;
12787     auto I = LPCI->getSecond().find(Pair.first);
12788     assert(I != LPCI->getSecond().end() &&
12789            "Lastprivate must be rehistered already.");
12790     // bool Cmp = priv_a.Fired != 0;
12791     LValue BaseLVal = std::get<3>(I->getSecond());
12792     LValue FiredLVal =
12793         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12794     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12795     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12796     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12797     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12798     // if (Cmp) {
12799     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12800     CGF.EmitBlock(ThenBB);
12801     Address Addr = CGF.GetAddrOfLocalVar(VD);
12802     LValue LVal;
12803     if (VD->getType()->isReferenceType())
12804       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12805                                            AlignmentSource::Decl);
12806     else
12807       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12808                                 AlignmentSource::Decl);
12809     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12810                                      D.getBeginLoc());
12811     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12812     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12813     // }
12814   }
12815 }
12816 
12817 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12818     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12819     SourceLocation Loc) {
12820   if (CGF.getLangOpts().OpenMP < 50)
12821     return;
12822   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12823   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12824          "Unknown lastprivate conditional variable.");
12825   StringRef UniqueName = It->second;
12826   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12827   // The variable was not updated in the region - exit.
12828   if (!GV)
12829     return;
12830   LValue LPLVal = CGF.MakeAddrLValue(
12831       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12832   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12833   CGF.EmitStoreOfScalar(Res, PrivLVal);
12834 }
12835 
12836 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12837     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12838     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12839   llvm_unreachable("Not supported in SIMD-only mode");
12840 }
12841 
12842 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12843     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12844     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12845   llvm_unreachable("Not supported in SIMD-only mode");
12846 }
12847 
12848 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12849     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12850     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12851     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12852     bool Tied, unsigned &NumberOfParts) {
12853   llvm_unreachable("Not supported in SIMD-only mode");
12854 }
12855 
12856 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12857                                            SourceLocation Loc,
12858                                            llvm::Function *OutlinedFn,
12859                                            ArrayRef<llvm::Value *> CapturedVars,
12860                                            const Expr *IfCond) {
12861   llvm_unreachable("Not supported in SIMD-only mode");
12862 }
12863 
12864 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12865     CodeGenFunction &CGF, StringRef CriticalName,
12866     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12867     const Expr *Hint) {
12868   llvm_unreachable("Not supported in SIMD-only mode");
12869 }
12870 
12871 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12872                                            const RegionCodeGenTy &MasterOpGen,
12873                                            SourceLocation Loc) {
12874   llvm_unreachable("Not supported in SIMD-only mode");
12875 }
12876 
12877 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12878                                            const RegionCodeGenTy &MasterOpGen,
12879                                            SourceLocation Loc,
12880                                            const Expr *Filter) {
12881   llvm_unreachable("Not supported in SIMD-only mode");
12882 }
12883 
12884 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12885                                             SourceLocation Loc) {
12886   llvm_unreachable("Not supported in SIMD-only mode");
12887 }
12888 
12889 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12890     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12891     SourceLocation Loc) {
12892   llvm_unreachable("Not supported in SIMD-only mode");
12893 }
12894 
12895 void CGOpenMPSIMDRuntime::emitSingleRegion(
12896     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12897     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12898     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12899     ArrayRef<const Expr *> AssignmentOps) {
12900   llvm_unreachable("Not supported in SIMD-only mode");
12901 }
12902 
12903 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12904                                             const RegionCodeGenTy &OrderedOpGen,
12905                                             SourceLocation Loc,
12906                                             bool IsThreads) {
12907   llvm_unreachable("Not supported in SIMD-only mode");
12908 }
12909 
12910 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12911                                           SourceLocation Loc,
12912                                           OpenMPDirectiveKind Kind,
12913                                           bool EmitChecks,
12914                                           bool ForceSimpleCall) {
12915   llvm_unreachable("Not supported in SIMD-only mode");
12916 }
12917 
12918 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12919     CodeGenFunction &CGF, SourceLocation Loc,
12920     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12921     bool Ordered, const DispatchRTInput &DispatchValues) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
12925 void CGOpenMPSIMDRuntime::emitForStaticInit(
12926     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12927     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12928   llvm_unreachable("Not supported in SIMD-only mode");
12929 }
12930 
12931 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12932     CodeGenFunction &CGF, SourceLocation Loc,
12933     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12934   llvm_unreachable("Not supported in SIMD-only mode");
12935 }
12936 
12937 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12938                                                      SourceLocation Loc,
12939                                                      unsigned IVSize,
12940                                                      bool IVSigned) {
12941   llvm_unreachable("Not supported in SIMD-only mode");
12942 }
12943 
12944 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12945                                               SourceLocation Loc,
12946                                               OpenMPDirectiveKind DKind) {
12947   llvm_unreachable("Not supported in SIMD-only mode");
12948 }
12949 
12950 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12951                                               SourceLocation Loc,
12952                                               unsigned IVSize, bool IVSigned,
12953                                               Address IL, Address LB,
12954                                               Address UB, Address ST) {
12955   llvm_unreachable("Not supported in SIMD-only mode");
12956 }
12957 
12958 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12959                                                llvm::Value *NumThreads,
12960                                                SourceLocation Loc) {
12961   llvm_unreachable("Not supported in SIMD-only mode");
12962 }
12963 
12964 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12965                                              ProcBindKind ProcBind,
12966                                              SourceLocation Loc) {
12967   llvm_unreachable("Not supported in SIMD-only mode");
12968 }
12969 
12970 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12971                                                     const VarDecl *VD,
12972                                                     Address VDAddr,
12973                                                     SourceLocation Loc) {
12974   llvm_unreachable("Not supported in SIMD-only mode");
12975 }
12976 
12977 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12978     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12979     CodeGenFunction *CGF) {
12980   llvm_unreachable("Not supported in SIMD-only mode");
12981 }
12982 
12983 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12984     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12985   llvm_unreachable("Not supported in SIMD-only mode");
12986 }
12987 
12988 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12989                                     ArrayRef<const Expr *> Vars,
12990                                     SourceLocation Loc,
12991                                     llvm::AtomicOrdering AO) {
12992   llvm_unreachable("Not supported in SIMD-only mode");
12993 }
12994 
12995 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12996                                        const OMPExecutableDirective &D,
12997                                        llvm::Function *TaskFunction,
12998                                        QualType SharedsTy, Address Shareds,
12999                                        const Expr *IfCond,
13000                                        const OMPTaskDataTy &Data) {
13001   llvm_unreachable("Not supported in SIMD-only mode");
13002 }
13003 
13004 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13005     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13006     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13007     const Expr *IfCond, const OMPTaskDataTy &Data) {
13008   llvm_unreachable("Not supported in SIMD-only mode");
13009 }
13010 
13011 void CGOpenMPSIMDRuntime::emitReduction(
13012     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13013     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13014     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13015   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13016   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13017                                  ReductionOps, Options);
13018 }
13019 
13020 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13021     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13022     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13023   llvm_unreachable("Not supported in SIMD-only mode");
13024 }
13025 
13026 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13027                                                 SourceLocation Loc,
13028                                                 bool IsWorksharingReduction) {
13029   llvm_unreachable("Not supported in SIMD-only mode");
13030 }
13031 
13032 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13033                                                   SourceLocation Loc,
13034                                                   ReductionCodeGen &RCG,
13035                                                   unsigned N) {
13036   llvm_unreachable("Not supported in SIMD-only mode");
13037 }
13038 
13039 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13040                                                   SourceLocation Loc,
13041                                                   llvm::Value *ReductionsPtr,
13042                                                   LValue SharedLVal) {
13043   llvm_unreachable("Not supported in SIMD-only mode");
13044 }
13045 
13046 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13047                                            SourceLocation Loc) {
13048   llvm_unreachable("Not supported in SIMD-only mode");
13049 }
13050 
13051 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13052     CodeGenFunction &CGF, SourceLocation Loc,
13053     OpenMPDirectiveKind CancelRegion) {
13054   llvm_unreachable("Not supported in SIMD-only mode");
13055 }
13056 
13057 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13058                                          SourceLocation Loc, const Expr *IfCond,
13059                                          OpenMPDirectiveKind CancelRegion) {
13060   llvm_unreachable("Not supported in SIMD-only mode");
13061 }
13062 
13063 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13064     const OMPExecutableDirective &D, StringRef ParentName,
13065     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13066     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13067   llvm_unreachable("Not supported in SIMD-only mode");
13068 }
13069 
13070 void CGOpenMPSIMDRuntime::emitTargetCall(
13071     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13072     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13073     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13074     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13075                                      const OMPLoopDirective &D)>
13076         SizeEmitter) {
13077   llvm_unreachable("Not supported in SIMD-only mode");
13078 }
13079 
13080 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13081   llvm_unreachable("Not supported in SIMD-only mode");
13082 }
13083 
13084 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13085   llvm_unreachable("Not supported in SIMD-only mode");
13086 }
13087 
13088 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13089   return false;
13090 }
13091 
13092 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13093                                         const OMPExecutableDirective &D,
13094                                         SourceLocation Loc,
13095                                         llvm::Function *OutlinedFn,
13096                                         ArrayRef<llvm::Value *> CapturedVars) {
13097   llvm_unreachable("Not supported in SIMD-only mode");
13098 }
13099 
13100 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13101                                              const Expr *NumTeams,
13102                                              const Expr *ThreadLimit,
13103                                              SourceLocation Loc) {
13104   llvm_unreachable("Not supported in SIMD-only mode");
13105 }
13106 
13107 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13108     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13109     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13110   llvm_unreachable("Not supported in SIMD-only mode");
13111 }
13112 
13113 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13114     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13115     const Expr *Device) {
13116   llvm_unreachable("Not supported in SIMD-only mode");
13117 }
13118 
13119 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13120                                            const OMPLoopDirective &D,
13121                                            ArrayRef<Expr *> NumIterations) {
13122   llvm_unreachable("Not supported in SIMD-only mode");
13123 }
13124 
13125 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13126                                               const OMPDependClause *C) {
13127   llvm_unreachable("Not supported in SIMD-only mode");
13128 }
13129 
13130 const VarDecl *
13131 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13132                                         const VarDecl *NativeParam) const {
13133   llvm_unreachable("Not supported in SIMD-only mode");
13134 }
13135 
13136 Address
13137 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13138                                          const VarDecl *NativeParam,
13139                                          const VarDecl *TargetParam) const {
13140   llvm_unreachable("Not supported in SIMD-only mode");
13141 }
13142