1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/BitmaskEnum.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413   bool NoInheritance = false;
414 
415 public:
416   /// Constructs region for combined constructs.
417   /// \param CodeGen Code generation sequence for combined directives. Includes
418   /// a list of functions used for code generation of implicitly inlined
419   /// regions.
420   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421                           OpenMPDirectiveKind Kind, bool HasCancel,
422                           bool NoInheritance = true)
423       : CGF(CGF), NoInheritance(NoInheritance) {
424     // Start emission for the construct.
425     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427     if (NoInheritance) {
428       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430       CGF.LambdaThisCaptureField = nullptr;
431       BlockInfo = CGF.BlockInfo;
432       CGF.BlockInfo = nullptr;
433     }
434   }
435 
436   ~InlinedOpenMPRegionRAII() {
437     // Restore original CapturedStmtInfo only if we're done with code emission.
438     auto *OldCSI =
439         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440     delete CGF.CapturedStmtInfo;
441     CGF.CapturedStmtInfo = OldCSI;
442     if (NoInheritance) {
443       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445       CGF.BlockInfo = BlockInfo;
446     }
447   }
448 };
449 
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454   /// Use trampoline for internal microtask.
455   OMP_IDENT_IMD = 0x01,
456   /// Use c-style ident structure.
457   OMP_IDENT_KMPC = 0x02,
458   /// Atomic reduction option for kmpc_reduce.
459   OMP_ATOMIC_REDUCE = 0x10,
460   /// Explicit 'barrier' directive.
461   OMP_IDENT_BARRIER_EXPL = 0x20,
462   /// Implicit barrier in code.
463   OMP_IDENT_BARRIER_IMPL = 0x40,
464   /// Implicit barrier in 'for' directive.
465   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466   /// Implicit barrier in 'sections' directive.
467   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468   /// Implicit barrier in 'single' directive.
469   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470   /// Call of __kmp_for_static_init for static loop.
471   OMP_IDENT_WORK_LOOP = 0x200,
472   /// Call of __kmp_for_static_init for sections.
473   OMP_IDENT_WORK_SECTIONS = 0x400,
474   /// Call of __kmp_for_static_init for distribute.
475   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478 
479 namespace {
480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481 /// Values for bit flags for marking which requires clauses have been used.
482 enum OpenMPOffloadingRequiresDirFlags : int64_t {
483   /// flag undefined.
484   OMP_REQ_UNDEFINED               = 0x000,
485   /// no requires clause present.
486   OMP_REQ_NONE                    = 0x001,
487   /// reverse_offload clause.
488   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
489   /// unified_address clause.
490   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
491   /// unified_shared_memory clause.
492   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
493   /// dynamic_allocators clause.
494   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
495   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497 
498 enum OpenMPOffloadingReservedDeviceIDs {
499   /// Device ID if the device was not defined, runtime should get it
500   /// from environment variables in the spec.
501   OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504 
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
511 ///                                  see above  */
512 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
513 ///                                  KMP_IDENT_KMPC identifies this union
514 ///                                  member  */
515 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
516 ///                                  see above */
517 ///#if USE_ITT_BUILD
518 ///                            /*  but currently used for storing
519 ///                                region-specific ITT */
520 ///                            /*  contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
523 ///                                 C++  */
524 ///    char const *psource;    /**< String describing the source location.
525 ///                            The string is composed of semi-colon separated
526 //                             fields which describe the source file,
527 ///                            the function and a pair of line numbers that
528 ///                            delimit the construct.
529 ///                             */
530 /// } ident_t;
531 enum IdentFieldIndex {
532   /// might be used in Fortran
533   IdentField_Reserved_1,
534   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535   IdentField_Flags,
536   /// Not really used in Fortran any more
537   IdentField_Reserved_2,
538   /// Source[4] in Fortran, do not use for C++
539   IdentField_Reserved_3,
540   /// String describing the source location. The string is composed of
541   /// semi-colon separated fields which describe the source file, the function
542   /// and a pair of line numbers that delimit the construct.
543   IdentField_PSource
544 };
545 
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549   /// Lower bound for default (unordered) versions.
550   OMP_sch_lower = 32,
551   OMP_sch_static_chunked = 33,
552   OMP_sch_static = 34,
553   OMP_sch_dynamic_chunked = 35,
554   OMP_sch_guided_chunked = 36,
555   OMP_sch_runtime = 37,
556   OMP_sch_auto = 38,
557   /// static with chunk adjustment (e.g., simd)
558   OMP_sch_static_balanced_chunked = 45,
559   /// Lower bound for 'ordered' versions.
560   OMP_ord_lower = 64,
561   OMP_ord_static_chunked = 65,
562   OMP_ord_static = 66,
563   OMP_ord_dynamic_chunked = 67,
564   OMP_ord_guided_chunked = 68,
565   OMP_ord_runtime = 69,
566   OMP_ord_auto = 70,
567   OMP_sch_default = OMP_sch_static,
568   /// dist_schedule types
569   OMP_dist_sch_static_chunked = 91,
570   OMP_dist_sch_static = 92,
571   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572   /// Set if the monotonic schedule modifier was present.
573   OMP_sch_modifier_monotonic = (1 << 29),
574   /// Set if the nonmonotonic schedule modifier was present.
575   OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577 
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581   PrePostActionTy *Action;
582 
583 public:
584   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586     if (!CGF.HaveInsertPoint())
587       return;
588     Action->Exit(CGF);
589   }
590 };
591 
592 } // anonymous namespace
593 
594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595   CodeGenFunction::RunCleanupsScope Scope(CGF);
596   if (PrePostAction) {
597     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598     Callback(CodeGen, CGF, *PrePostAction);
599   } else {
600     PrePostActionTy Action;
601     Callback(CodeGen, CGF, Action);
602   }
603 }
604 
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
608 getReductionInit(const Expr *ReductionOp) {
609   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611       if (const auto *DRE =
612               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614           return DRD;
615   return nullptr;
616 }
617 
618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619                                              const OMPDeclareReductionDecl *DRD,
620                                              const Expr *InitOp,
621                                              Address Private, Address Original,
622                                              QualType Ty) {
623   if (DRD->getInitializer()) {
624     std::pair<llvm::Function *, llvm::Function *> Reduction =
625         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626     const auto *CE = cast<CallExpr>(InitOp);
627     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630     const auto *LHSDRE =
631         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632     const auto *RHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636                             [=]() { return Private; });
637     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638                             [=]() { return Original; });
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   DestAddr =
691       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd =
702       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
703   // The basic structure here is a while-do loop.
704   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
705   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
706   llvm::Value *IsEmpty =
707       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
708   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
709 
710   // Enter the loop body, making that address the current address.
711   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
712   CGF.EmitBlock(BodyBB);
713 
714   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
715 
716   llvm::PHINode *SrcElementPHI = nullptr;
717   Address SrcElementCurrent = Address::invalid();
718   if (DRD) {
719     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
720                                           "omp.arraycpy.srcElementPast");
721     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
722     SrcElementCurrent =
723         Address(SrcElementPHI,
724                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725   }
726   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
727       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
728   DestElementPHI->addIncoming(DestBegin, EntryBB);
729   Address DestElementCurrent =
730       Address(DestElementPHI,
731               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732 
733   // Emit copy.
734   {
735     CodeGenFunction::RunCleanupsScope InitScope(CGF);
736     if (EmitDeclareReductionInit) {
737       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
738                                        SrcElementCurrent, ElementTy);
739     } else
740       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
741                            /*IsInitializer=*/false);
742   }
743 
744   if (DRD) {
745     // Shift the address forward by one element.
746     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
747         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
748         "omp.arraycpy.dest.element");
749     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750   }
751 
752   // Shift the address forward by one element.
753   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
754       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
755       "omp.arraycpy.dest.element");
756   // Check whether we've reached the end.
757   llvm::Value *Done =
758       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
759   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
760   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
761 
762   // Done.
763   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
767   return CGF.EmitOMPSharedLValue(E);
768 }
769 
770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
771                                             const Expr *E) {
772   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
773     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
774   return LValue();
775 }
776 
777 void ReductionCodeGen::emitAggregateInitialization(
778     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
779     const OMPDeclareReductionDecl *DRD) {
780   // Emit VarDecl with copy init for arrays.
781   // Get the address of the original variable captured in current
782   // captured region.
783   const auto *PrivateVD =
784       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
785   bool EmitDeclareReductionInit =
786       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
787   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
788                        EmitDeclareReductionInit,
789                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
790                                                 : PrivateVD->getInit(),
791                        DRD, SharedLVal.getAddress(CGF));
792 }
793 
794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
795                                    ArrayRef<const Expr *> Origs,
796                                    ArrayRef<const Expr *> Privates,
797                                    ArrayRef<const Expr *> ReductionOps) {
798   ClausesData.reserve(Shareds.size());
799   SharedAddresses.reserve(Shareds.size());
800   Sizes.reserve(Shareds.size());
801   BaseDecls.reserve(Shareds.size());
802   const auto *IOrig = Origs.begin();
803   const auto *IPriv = Privates.begin();
804   const auto *IRed = ReductionOps.begin();
805   for (const Expr *Ref : Shareds) {
806     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
807     std::advance(IOrig, 1);
808     std::advance(IPriv, 1);
809     std::advance(IRed, 1);
810   }
811 }
812 
813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
814   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
815          "Number of generated lvalues must be exactly N.");
816   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
817   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
818   SharedAddresses.emplace_back(First, Second);
819   if (ClausesData[N].Shared == ClausesData[N].Ref) {
820     OrigAddresses.emplace_back(First, Second);
821   } else {
822     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
823     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
824     OrigAddresses.emplace_back(First, Second);
825   }
826 }
827 
828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
829   const auto *PrivateVD =
830       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
831   QualType PrivateType = PrivateVD->getType();
832   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
833   if (!PrivateType->isVariablyModifiedType()) {
834     Sizes.emplace_back(
835         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
836         nullptr);
837     return;
838   }
839   llvm::Value *Size;
840   llvm::Value *SizeInChars;
841   auto *ElemType =
842       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
843           ->getElementType();
844   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
845   if (AsArraySection) {
846     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
847                                      OrigAddresses[N].first.getPointer(CGF));
848     Size = CGF.Builder.CreateNUWAdd(
849         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
850     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
851   } else {
852     SizeInChars =
853         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
854     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
855   }
856   Sizes.emplace_back(SizeInChars, Size);
857   CodeGenFunction::OpaqueValueMapping OpaqueMap(
858       CGF,
859       cast<OpaqueValueExpr>(
860           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
861       RValue::get(Size));
862   CGF.EmitVariablyModifiedType(PrivateType);
863 }
864 
865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
866                                          llvm::Value *Size) {
867   const auto *PrivateVD =
868       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
869   QualType PrivateType = PrivateVD->getType();
870   if (!PrivateType->isVariablyModifiedType()) {
871     assert(!Size && !Sizes[N].second &&
872            "Size should be nullptr for non-variably modified reduction "
873            "items.");
874     return;
875   }
876   CodeGenFunction::OpaqueValueMapping OpaqueMap(
877       CGF,
878       cast<OpaqueValueExpr>(
879           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
880       RValue::get(Size));
881   CGF.EmitVariablyModifiedType(PrivateType);
882 }
883 
884 void ReductionCodeGen::emitInitialization(
885     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
886     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
887   assert(SharedAddresses.size() > N && "No variable was generated");
888   const auto *PrivateVD =
889       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
890   const OMPDeclareReductionDecl *DRD =
891       getReductionInit(ClausesData[N].ReductionOp);
892   QualType PrivateType = PrivateVD->getType();
893   PrivateAddr = CGF.Builder.CreateElementBitCast(
894       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
895   QualType SharedType = SharedAddresses[N].first.getType();
896   SharedLVal = CGF.MakeAddrLValue(
897       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
898                                        CGF.ConvertTypeForMem(SharedType)),
899       SharedType, SharedAddresses[N].first.getBaseInfo(),
900       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
901   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
902     if (DRD && DRD->getInitializer())
903       (void)DefaultInit(CGF);
904     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
905   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
906     (void)DefaultInit(CGF);
907     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
908                                      PrivateAddr, SharedLVal.getAddress(CGF),
909                                      SharedLVal.getType());
910   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
911              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
912     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
913                          PrivateVD->getType().getQualifiers(),
914                          /*IsInitializer=*/false);
915   }
916 }
917 
918 bool ReductionCodeGen::needCleanups(unsigned N) {
919   const auto *PrivateVD =
920       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
921   QualType PrivateType = PrivateVD->getType();
922   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
923   return DTorKind != QualType::DK_none;
924 }
925 
926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
927                                     Address PrivateAddr) {
928   const auto *PrivateVD =
929       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
930   QualType PrivateType = PrivateVD->getType();
931   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
932   if (needCleanups(N)) {
933     PrivateAddr = CGF.Builder.CreateElementBitCast(
934         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
935     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
936   }
937 }
938 
939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           LValue BaseLV) {
941   BaseTy = BaseTy.getNonReferenceType();
942   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
943          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
944     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
945       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
946     } else {
947       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
948       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
949     }
950     BaseTy = BaseTy->getPointeeType();
951   }
952   return CGF.MakeAddrLValue(
953       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
954                                        CGF.ConvertTypeForMem(ElTy)),
955       BaseLV.getType(), BaseLV.getBaseInfo(),
956       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
957 }
958 
959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
960                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
961                           llvm::Value *Addr) {
962   Address Tmp = Address::invalid();
963   Address TopTmp = Address::invalid();
964   Address MostTopTmp = Address::invalid();
965   BaseTy = BaseTy.getNonReferenceType();
966   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
967          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
968     Tmp = CGF.CreateMemTemp(BaseTy);
969     if (TopTmp.isValid())
970       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
971     else
972       MostTopTmp = Tmp;
973     TopTmp = Tmp;
974     BaseTy = BaseTy->getPointeeType();
975   }
976   llvm::Type *Ty = BaseLVType;
977   if (Tmp.isValid())
978     Ty = Tmp.getElementType();
979   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
980   if (Tmp.isValid()) {
981     CGF.Builder.CreateStore(Addr, Tmp);
982     return MostTopTmp;
983   }
984   return Address(Addr, BaseLVAlignment);
985 }
986 
987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
988   const VarDecl *OrigVD = nullptr;
989   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
990     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
992       Base = TempOASE->getBase()->IgnoreParenImpCasts();
993     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
994       Base = TempASE->getBase()->IgnoreParenImpCasts();
995     DE = cast<DeclRefExpr>(Base);
996     OrigVD = cast<VarDecl>(DE->getDecl());
997   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
998     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
999     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1000       Base = TempASE->getBase()->IgnoreParenImpCasts();
1001     DE = cast<DeclRefExpr>(Base);
1002     OrigVD = cast<VarDecl>(DE->getDecl());
1003   }
1004   return OrigVD;
1005 }
1006 
1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1008                                                Address PrivateAddr) {
1009   const DeclRefExpr *DE;
1010   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1011     BaseDecls.emplace_back(OrigVD);
1012     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1013     LValue BaseLValue =
1014         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1015                     OriginalBaseLValue);
1016     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1017     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1018         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1019     llvm::Value *PrivatePointer =
1020         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1021             PrivateAddr.getPointer(), SharedAddr.getType());
1022     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1023         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1024     return castToBase(CGF, OrigVD->getType(),
1025                       SharedAddresses[N].first.getType(),
1026                       OriginalBaseLValue.getAddress(CGF).getType(),
1027                       OriginalBaseLValue.getAlignment(), Ptr);
1028   }
1029   BaseDecls.emplace_back(
1030       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1031   return PrivateAddr;
1032 }
1033 
1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1035   const OMPDeclareReductionDecl *DRD =
1036       getReductionInit(ClausesData[N].ReductionOp);
1037   return DRD && DRD->getInitializer();
1038 }
1039 
1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1041   return CGF.EmitLoadOfPointerLValue(
1042       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1043       getThreadIDVariable()->getType()->castAs<PointerType>());
1044 }
1045 
1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1047   if (!CGF.HaveInsertPoint())
1048     return;
1049   // 1.2.2 OpenMP Language Terminology
1050   // Structured block - An executable statement with a single entry at the
1051   // top and a single exit at the bottom.
1052   // The point of exit cannot be a branch out of the structured block.
1053   // longjmp() and throw() must not violate the entry/exit criteria.
1054   CGF.EHStack.pushTerminate();
1055   if (S)
1056     CGF.incrementProfileCounter(S);
1057   CodeGen(CGF);
1058   CGF.EHStack.popTerminate();
1059 }
1060 
1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1062     CodeGenFunction &CGF) {
1063   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1064                             getThreadIDVariable()->getType(),
1065                             AlignmentSource::Decl);
1066 }
1067 
1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1069                                        QualType FieldTy) {
1070   auto *Field = FieldDecl::Create(
1071       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1072       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1073       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1074   Field->setAccess(AS_public);
1075   DC->addDecl(Field);
1076   return Field;
1077 }
1078 
1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1080                                  StringRef Separator)
1081     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1082       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1083   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1084 
1085   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1086   OMPBuilder.initialize();
1087   loadOffloadInfoMetadata();
1088 }
1089 
1090 void CGOpenMPRuntime::clear() {
1091   InternalVars.clear();
1092   // Clean non-target variable declarations possibly used only in debug info.
1093   for (const auto &Data : EmittedNonTargetVariables) {
1094     if (!Data.getValue().pointsToAliveValue())
1095       continue;
1096     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1097     if (!GV)
1098       continue;
1099     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1100       continue;
1101     GV->eraseFromParent();
1102   }
1103 }
1104 
1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1106   SmallString<128> Buffer;
1107   llvm::raw_svector_ostream OS(Buffer);
1108   StringRef Sep = FirstSeparator;
1109   for (StringRef Part : Parts) {
1110     OS << Sep << Part;
1111     Sep = Separator;
1112   }
1113   return std::string(OS.str());
1114 }
1115 
1116 static llvm::Function *
1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1118                           const Expr *CombinerInitializer, const VarDecl *In,
1119                           const VarDecl *Out, bool IsCombiner) {
1120   // void .omp_combiner.(Ty *in, Ty *out);
1121   ASTContext &C = CGM.getContext();
1122   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1123   FunctionArgList Args;
1124   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1125                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1127                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1128   Args.push_back(&OmpOutParm);
1129   Args.push_back(&OmpInParm);
1130   const CGFunctionInfo &FnInfo =
1131       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1132   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1133   std::string Name = CGM.getOpenMPRuntime().getName(
1134       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1135   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1136                                     Name, &CGM.getModule());
1137   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1138   if (CGM.getLangOpts().Optimize) {
1139     Fn->removeFnAttr(llvm::Attribute::NoInline);
1140     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1141     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1142   }
1143   CodeGenFunction CGF(CGM);
1144   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1145   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1146   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1147                     Out->getLocation());
1148   CodeGenFunction::OMPPrivateScope Scope(CGF);
1149   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1150   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1151     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1152         .getAddress(CGF);
1153   });
1154   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1155   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1156     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1157         .getAddress(CGF);
1158   });
1159   (void)Scope.Privatize();
1160   if (!IsCombiner && Out->hasInit() &&
1161       !CGF.isTrivialInitializer(Out->getInit())) {
1162     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1163                          Out->getType().getQualifiers(),
1164                          /*IsInitializer=*/true);
1165   }
1166   if (CombinerInitializer)
1167     CGF.EmitIgnoredExpr(CombinerInitializer);
1168   Scope.ForceCleanup();
1169   CGF.FinishFunction();
1170   return Fn;
1171 }
1172 
1173 void CGOpenMPRuntime::emitUserDefinedReduction(
1174     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1175   if (UDRMap.count(D) > 0)
1176     return;
1177   llvm::Function *Combiner = emitCombinerOrInitializer(
1178       CGM, D->getType(), D->getCombiner(),
1179       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1181       /*IsCombiner=*/true);
1182   llvm::Function *Initializer = nullptr;
1183   if (const Expr *Init = D->getInitializer()) {
1184     Initializer = emitCombinerOrInitializer(
1185         CGM, D->getType(),
1186         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1187                                                                      : nullptr,
1188         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1190         /*IsCombiner=*/false);
1191   }
1192   UDRMap.try_emplace(D, Combiner, Initializer);
1193   if (CGF) {
1194     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1195     Decls.second.push_back(D);
1196   }
1197 }
1198 
1199 std::pair<llvm::Function *, llvm::Function *>
1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1201   auto I = UDRMap.find(D);
1202   if (I != UDRMap.end())
1203     return I->second;
1204   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1205   return UDRMap.lookup(D);
1206 }
1207 
1208 namespace {
1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1210 // Builder if one is present.
1211 struct PushAndPopStackRAII {
1212   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1213                       bool HasCancel, llvm::omp::Directive Kind)
1214       : OMPBuilder(OMPBuilder) {
1215     if (!OMPBuilder)
1216       return;
1217 
1218     // The following callback is the crucial part of clangs cleanup process.
1219     //
1220     // NOTE:
1221     // Once the OpenMPIRBuilder is used to create parallel regions (and
1222     // similar), the cancellation destination (Dest below) is determined via
1223     // IP. That means if we have variables to finalize we split the block at IP,
1224     // use the new block (=BB) as destination to build a JumpDest (via
1225     // getJumpDestInCurrentScope(BB)) which then is fed to
1226     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1227     // to push & pop an FinalizationInfo object.
1228     // The FiniCB will still be needed but at the point where the
1229     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1230     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1231       assert(IP.getBlock()->end() == IP.getPoint() &&
1232              "Clang CG should cause non-terminated block!");
1233       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1234       CGF.Builder.restoreIP(IP);
1235       CodeGenFunction::JumpDest Dest =
1236           CGF.getOMPCancelDestination(OMPD_parallel);
1237       CGF.EmitBranchThroughCleanup(Dest);
1238     };
1239 
1240     // TODO: Remove this once we emit parallel regions through the
1241     //       OpenMPIRBuilder as it can do this setup internally.
1242     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1243     OMPBuilder->pushFinalizationCB(std::move(FI));
1244   }
1245   ~PushAndPopStackRAII() {
1246     if (OMPBuilder)
1247       OMPBuilder->popFinalizationCB();
1248   }
1249   llvm::OpenMPIRBuilder *OMPBuilder;
1250 };
1251 } // namespace
1252 
1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1254     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1255     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1256     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1257   assert(ThreadIDVar->getType()->isPointerType() &&
1258          "thread id variable must be of type kmp_int32 *");
1259   CodeGenFunction CGF(CGM, true);
1260   bool HasCancel = false;
1261   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1262     HasCancel = OPD->hasCancel();
1263   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1264     HasCancel = OPD->hasCancel();
1265   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1266     HasCancel = OPSD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273   else if (const auto *OPFD =
1274                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1275     HasCancel = OPFD->hasCancel();
1276   else if (const auto *OPFD =
1277                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1278     HasCancel = OPFD->hasCancel();
1279 
1280   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1281   //       parallel region to make cancellation barriers work properly.
1282   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1283   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1285                                     HasCancel, OutlinedHelperName);
1286   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1287   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1293   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1294   return emitParallelOrTeamsOutlinedFunction(
1295       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1296 }
1297 
1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1299     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1300     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1301   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1302   return emitParallelOrTeamsOutlinedFunction(
1303       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1304 }
1305 
1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1307     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1308     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1309     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1310     bool Tied, unsigned &NumberOfParts) {
1311   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1312                                               PrePostActionTy &) {
1313     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1314     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1315     llvm::Value *TaskArgs[] = {
1316         UpLoc, ThreadID,
1317         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1318                                     TaskTVar->getType()->castAs<PointerType>())
1319             .getPointer(CGF)};
1320     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1321                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1322                         TaskArgs);
1323   };
1324   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1325                                                             UntiedCodeGen);
1326   CodeGen.setAction(Action);
1327   assert(!ThreadIDVar->getType()->isPointerType() &&
1328          "thread id variable must be of type kmp_int32 for tasks");
1329   const OpenMPDirectiveKind Region =
1330       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1331                                                       : OMPD_task;
1332   const CapturedStmt *CS = D.getCapturedStmt(Region);
1333   bool HasCancel = false;
1334   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1339     HasCancel = TD->hasCancel();
1340   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1341     HasCancel = TD->hasCancel();
1342 
1343   CodeGenFunction CGF(CGM, true);
1344   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1345                                         InnermostKind, HasCancel, Action);
1346   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1347   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1348   if (!Tied)
1349     NumberOfParts = Action.getNumberOfParts();
1350   return Res;
1351 }
1352 
1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1354                              const RecordDecl *RD, const CGRecordLayout &RL,
1355                              ArrayRef<llvm::Constant *> Data) {
1356   llvm::StructType *StructTy = RL.getLLVMType();
1357   unsigned PrevIdx = 0;
1358   ConstantInitBuilder CIBuilder(CGM);
1359   auto DI = Data.begin();
1360   for (const FieldDecl *FD : RD->fields()) {
1361     unsigned Idx = RL.getLLVMFieldNo(FD);
1362     // Fill the alignment.
1363     for (unsigned I = PrevIdx; I < Idx; ++I)
1364       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1365     PrevIdx = Idx + 1;
1366     Fields.add(*DI);
1367     ++DI;
1368   }
1369 }
1370 
1371 template <class... As>
1372 static llvm::GlobalVariable *
1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1374                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1375                    As &&... Args) {
1376   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1377   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1378   ConstantInitBuilder CIBuilder(CGM);
1379   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1380   buildStructValue(Fields, CGM, RD, RL, Data);
1381   return Fields.finishAndCreateGlobal(
1382       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1383       std::forward<As>(Args)...);
1384 }
1385 
1386 template <typename T>
1387 static void
1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1389                                          ArrayRef<llvm::Constant *> Data,
1390                                          T &Parent) {
1391   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1392   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1393   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1394   buildStructValue(Fields, CGM, RD, RL, Data);
1395   Fields.finishAndAddTo(Parent);
1396 }
1397 
1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1399                                              bool AtCurrentPoint) {
1400   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1401   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1402 
1403   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1404   if (AtCurrentPoint) {
1405     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1406         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1407   } else {
1408     Elem.second.ServiceInsertPt =
1409         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1410     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1411   }
1412 }
1413 
1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1415   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1416   if (Elem.second.ServiceInsertPt) {
1417     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1418     Elem.second.ServiceInsertPt = nullptr;
1419     Ptr->eraseFromParent();
1420   }
1421 }
1422 
1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1424                                                   SourceLocation Loc,
1425                                                   SmallString<128> &Buffer) {
1426   llvm::raw_svector_ostream OS(Buffer);
1427   // Build debug location
1428   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1429   OS << ";" << PLoc.getFilename() << ";";
1430   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1431     OS << FD->getQualifiedNameAsString();
1432   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1433   return OS.str();
1434 }
1435 
1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1437                                                  SourceLocation Loc,
1438                                                  unsigned Flags) {
1439   llvm::Constant *SrcLocStr;
1440   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1441       Loc.isInvalid()) {
1442     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1443   } else {
1444     std::string FunctionName = "";
1445     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1446       FunctionName = FD->getQualifiedNameAsString();
1447     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1448     const char *FileName = PLoc.getFilename();
1449     unsigned Line = PLoc.getLine();
1450     unsigned Column = PLoc.getColumn();
1451     SrcLocStr =
1452         OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column);
1453   }
1454   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1455   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1456                                      Reserved2Flags);
1457 }
1458 
1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1460                                           SourceLocation Loc) {
1461   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1462   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1463   // the clang invariants used below might be broken.
1464   if (CGM.getLangOpts().OpenMPIRBuilder) {
1465     SmallString<128> Buffer;
1466     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1467     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1468         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1469     return OMPBuilder.getOrCreateThreadID(
1470         OMPBuilder.getOrCreateIdent(SrcLocStr));
1471   }
1472 
1473   llvm::Value *ThreadID = nullptr;
1474   // Check whether we've already cached a load of the thread id in this
1475   // function.
1476   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1477   if (I != OpenMPLocThreadIDMap.end()) {
1478     ThreadID = I->second.ThreadID;
1479     if (ThreadID != nullptr)
1480       return ThreadID;
1481   }
1482   // If exceptions are enabled, do not use parameter to avoid possible crash.
1483   if (auto *OMPRegionInfo =
1484           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1485     if (OMPRegionInfo->getThreadIDVariable()) {
1486       // Check if this an outlined function with thread id passed as argument.
1487       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1488       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1489       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1490           !CGF.getLangOpts().CXXExceptions ||
1491           CGF.Builder.GetInsertBlock() == TopBlock ||
1492           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1493           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1494               TopBlock ||
1495           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1496               CGF.Builder.GetInsertBlock()) {
1497         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1498         // If value loaded in entry block, cache it and use it everywhere in
1499         // function.
1500         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1501           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1502           Elem.second.ThreadID = ThreadID;
1503         }
1504         return ThreadID;
1505       }
1506     }
1507   }
1508 
1509   // This is not an outlined function region - need to call __kmpc_int32
1510   // kmpc_global_thread_num(ident_t *loc).
1511   // Generate thread id value and cache this value for use across the
1512   // function.
1513   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1514   if (!Elem.second.ServiceInsertPt)
1515     setLocThreadIdInsertPt(CGF);
1516   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1517   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1518   llvm::CallInst *Call = CGF.Builder.CreateCall(
1519       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1520                                             OMPRTL___kmpc_global_thread_num),
1521       emitUpdateLocation(CGF, Loc));
1522   Call->setCallingConv(CGF.getRuntimeCC());
1523   Elem.second.ThreadID = Call;
1524   return Call;
1525 }
1526 
1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1528   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1529   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1530     clearLocThreadIdInsertPt(CGF);
1531     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1532   }
1533   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1534     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1535       UDRMap.erase(D);
1536     FunctionUDRMap.erase(CGF.CurFn);
1537   }
1538   auto I = FunctionUDMMap.find(CGF.CurFn);
1539   if (I != FunctionUDMMap.end()) {
1540     for(const auto *D : I->second)
1541       UDMMap.erase(D);
1542     FunctionUDMMap.erase(I);
1543   }
1544   LastprivateConditionalToTypes.erase(CGF.CurFn);
1545   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1549   return OMPBuilder.IdentPtr;
1550 }
1551 
1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1553   if (!Kmpc_MicroTy) {
1554     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1555     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1556                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1557     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1558   }
1559   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1560 }
1561 
1562 llvm::FunctionCallee
1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1564                                              bool IsGPUDistribute) {
1565   assert((IVSize == 32 || IVSize == 64) &&
1566          "IV size is not compatible with the omp runtime");
1567   StringRef Name;
1568   if (IsGPUDistribute)
1569     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1570                                     : "__kmpc_distribute_static_init_4u")
1571                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1572                                     : "__kmpc_distribute_static_init_8u");
1573   else
1574     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1575                                     : "__kmpc_for_static_init_4u")
1576                         : (IVSigned ? "__kmpc_for_static_init_8"
1577                                     : "__kmpc_for_static_init_8u");
1578 
1579   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1580   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1581   llvm::Type *TypeParams[] = {
1582     getIdentTyPointerTy(),                     // loc
1583     CGM.Int32Ty,                               // tid
1584     CGM.Int32Ty,                               // schedtype
1585     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1586     PtrTy,                                     // p_lower
1587     PtrTy,                                     // p_upper
1588     PtrTy,                                     // p_stride
1589     ITy,                                       // incr
1590     ITy                                        // chunk
1591   };
1592   auto *FnTy =
1593       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1594   return CGM.CreateRuntimeFunction(FnTy, Name);
1595 }
1596 
1597 llvm::FunctionCallee
1598 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1599   assert((IVSize == 32 || IVSize == 64) &&
1600          "IV size is not compatible with the omp runtime");
1601   StringRef Name =
1602       IVSize == 32
1603           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1604           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1605   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1606   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1607                                CGM.Int32Ty,           // tid
1608                                CGM.Int32Ty,           // schedtype
1609                                ITy,                   // lower
1610                                ITy,                   // upper
1611                                ITy,                   // stride
1612                                ITy                    // chunk
1613   };
1614   auto *FnTy =
1615       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1616   return CGM.CreateRuntimeFunction(FnTy, Name);
1617 }
1618 
1619 llvm::FunctionCallee
1620 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1621   assert((IVSize == 32 || IVSize == 64) &&
1622          "IV size is not compatible with the omp runtime");
1623   StringRef Name =
1624       IVSize == 32
1625           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1626           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1627   llvm::Type *TypeParams[] = {
1628       getIdentTyPointerTy(), // loc
1629       CGM.Int32Ty,           // tid
1630   };
1631   auto *FnTy =
1632       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1633   return CGM.CreateRuntimeFunction(FnTy, Name);
1634 }
1635 
1636 llvm::FunctionCallee
1637 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1638   assert((IVSize == 32 || IVSize == 64) &&
1639          "IV size is not compatible with the omp runtime");
1640   StringRef Name =
1641       IVSize == 32
1642           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1643           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1644   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1645   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1646   llvm::Type *TypeParams[] = {
1647     getIdentTyPointerTy(),                     // loc
1648     CGM.Int32Ty,                               // tid
1649     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1650     PtrTy,                                     // p_lower
1651     PtrTy,                                     // p_upper
1652     PtrTy                                      // p_stride
1653   };
1654   auto *FnTy =
1655       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1656   return CGM.CreateRuntimeFunction(FnTy, Name);
1657 }
1658 
1659 /// Obtain information that uniquely identifies a target entry. This
1660 /// consists of the file and device IDs as well as line number associated with
1661 /// the relevant entry source location.
1662 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1663                                      unsigned &DeviceID, unsigned &FileID,
1664                                      unsigned &LineNum) {
1665   SourceManager &SM = C.getSourceManager();
1666 
1667   // The loc should be always valid and have a file ID (the user cannot use
1668   // #pragma directives in macros)
1669 
1670   assert(Loc.isValid() && "Source location is expected to be always valid.");
1671 
1672   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1673   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1674 
1675   llvm::sys::fs::UniqueID ID;
1676   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1677     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1678     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1679     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1680       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1681           << PLoc.getFilename() << EC.message();
1682   }
1683 
1684   DeviceID = ID.getDevice();
1685   FileID = ID.getFile();
1686   LineNum = PLoc.getLine();
1687 }
1688 
1689 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1690   if (CGM.getLangOpts().OpenMPSimd)
1691     return Address::invalid();
1692   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1693       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1694   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1695               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1696                HasRequiresUnifiedSharedMemory))) {
1697     SmallString<64> PtrName;
1698     {
1699       llvm::raw_svector_ostream OS(PtrName);
1700       OS << CGM.getMangledName(GlobalDecl(VD));
1701       if (!VD->isExternallyVisible()) {
1702         unsigned DeviceID, FileID, Line;
1703         getTargetEntryUniqueInfo(CGM.getContext(),
1704                                  VD->getCanonicalDecl()->getBeginLoc(),
1705                                  DeviceID, FileID, Line);
1706         OS << llvm::format("_%x", FileID);
1707       }
1708       OS << "_decl_tgt_ref_ptr";
1709     }
1710     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1711     if (!Ptr) {
1712       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1713       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1714                                         PtrName);
1715 
1716       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1717       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1718 
1719       if (!CGM.getLangOpts().OpenMPIsDevice)
1720         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1721       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1722     }
1723     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1724   }
1725   return Address::invalid();
1726 }
1727 
1728 llvm::Constant *
1729 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1730   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1731          !CGM.getContext().getTargetInfo().isTLSSupported());
1732   // Lookup the entry, lazily creating it if necessary.
1733   std::string Suffix = getName({"cache", ""});
1734   return getOrCreateInternalVariable(
1735       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1736 }
1737 
1738 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1739                                                 const VarDecl *VD,
1740                                                 Address VDAddr,
1741                                                 SourceLocation Loc) {
1742   if (CGM.getLangOpts().OpenMPUseTLS &&
1743       CGM.getContext().getTargetInfo().isTLSSupported())
1744     return VDAddr;
1745 
1746   llvm::Type *VarTy = VDAddr.getElementType();
1747   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1748                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1749                                                        CGM.Int8PtrTy),
1750                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1751                          getOrCreateThreadPrivateCache(VD)};
1752   return Address(CGF.EmitRuntimeCall(
1753                      OMPBuilder.getOrCreateRuntimeFunction(
1754                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1755                      Args),
1756                  VDAddr.getAlignment());
1757 }
1758 
1759 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1760     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1761     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1762   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1763   // library.
1764   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1765   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1766                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1767                       OMPLoc);
1768   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1769   // to register constructor/destructor for variable.
1770   llvm::Value *Args[] = {
1771       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1772       Ctor, CopyCtor, Dtor};
1773   CGF.EmitRuntimeCall(
1774       OMPBuilder.getOrCreateRuntimeFunction(
1775           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1776       Args);
1777 }
1778 
1779 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1780     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1781     bool PerformInit, CodeGenFunction *CGF) {
1782   if (CGM.getLangOpts().OpenMPUseTLS &&
1783       CGM.getContext().getTargetInfo().isTLSSupported())
1784     return nullptr;
1785 
1786   VD = VD->getDefinition(CGM.getContext());
1787   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1788     QualType ASTTy = VD->getType();
1789 
1790     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1791     const Expr *Init = VD->getAnyInitializer();
1792     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1793       // Generate function that re-emits the declaration's initializer into the
1794       // threadprivate copy of the variable VD
1795       CodeGenFunction CtorCGF(CGM);
1796       FunctionArgList Args;
1797       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1798                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1799                             ImplicitParamDecl::Other);
1800       Args.push_back(&Dst);
1801 
1802       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1803           CGM.getContext().VoidPtrTy, Args);
1804       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1805       std::string Name = getName({"__kmpc_global_ctor_", ""});
1806       llvm::Function *Fn =
1807           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1808       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1809                             Args, Loc, Loc);
1810       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1811           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1812           CGM.getContext().VoidPtrTy, Dst.getLocation());
1813       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1814       Arg = CtorCGF.Builder.CreateElementBitCast(
1815           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1816       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1817                                /*IsInitializer=*/true);
1818       ArgVal = CtorCGF.EmitLoadOfScalar(
1819           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1820           CGM.getContext().VoidPtrTy, Dst.getLocation());
1821       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1822       CtorCGF.FinishFunction();
1823       Ctor = Fn;
1824     }
1825     if (VD->getType().isDestructedType() != QualType::DK_none) {
1826       // Generate function that emits destructor call for the threadprivate copy
1827       // of the variable VD
1828       CodeGenFunction DtorCGF(CGM);
1829       FunctionArgList Args;
1830       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1831                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1832                             ImplicitParamDecl::Other);
1833       Args.push_back(&Dst);
1834 
1835       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1836           CGM.getContext().VoidTy, Args);
1837       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1838       std::string Name = getName({"__kmpc_global_dtor_", ""});
1839       llvm::Function *Fn =
1840           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1841       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1842       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1843                             Loc, Loc);
1844       // Create a scope with an artificial location for the body of this function.
1845       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1846       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1847           DtorCGF.GetAddrOfLocalVar(&Dst),
1848           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1849       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1850                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1851                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1852       DtorCGF.FinishFunction();
1853       Dtor = Fn;
1854     }
1855     // Do not emit init function if it is not required.
1856     if (!Ctor && !Dtor)
1857       return nullptr;
1858 
1859     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1860     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1861                                                /*isVarArg=*/false)
1862                            ->getPointerTo();
1863     // Copying constructor for the threadprivate variable.
1864     // Must be NULL - reserved by runtime, but currently it requires that this
1865     // parameter is always NULL. Otherwise it fires assertion.
1866     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1867     if (Ctor == nullptr) {
1868       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1869                                              /*isVarArg=*/false)
1870                          ->getPointerTo();
1871       Ctor = llvm::Constant::getNullValue(CtorTy);
1872     }
1873     if (Dtor == nullptr) {
1874       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1875                                              /*isVarArg=*/false)
1876                          ->getPointerTo();
1877       Dtor = llvm::Constant::getNullValue(DtorTy);
1878     }
1879     if (!CGF) {
1880       auto *InitFunctionTy =
1881           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1882       std::string Name = getName({"__omp_threadprivate_init_", ""});
1883       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1884           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1885       CodeGenFunction InitCGF(CGM);
1886       FunctionArgList ArgList;
1887       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1888                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1889                             Loc, Loc);
1890       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1891       InitCGF.FinishFunction();
1892       return InitFunction;
1893     }
1894     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1895   }
1896   return nullptr;
1897 }
1898 
1899 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1900                                                      llvm::GlobalVariable *Addr,
1901                                                      bool PerformInit) {
1902   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1903       !CGM.getLangOpts().OpenMPIsDevice)
1904     return false;
1905   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1906       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1907   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1908       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1909        HasRequiresUnifiedSharedMemory))
1910     return CGM.getLangOpts().OpenMPIsDevice;
1911   VD = VD->getDefinition(CGM.getContext());
1912   assert(VD && "Unknown VarDecl");
1913 
1914   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1915     return CGM.getLangOpts().OpenMPIsDevice;
1916 
1917   QualType ASTTy = VD->getType();
1918   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1919 
1920   // Produce the unique prefix to identify the new target regions. We use
1921   // the source location of the variable declaration which we know to not
1922   // conflict with any target region.
1923   unsigned DeviceID;
1924   unsigned FileID;
1925   unsigned Line;
1926   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1927   SmallString<128> Buffer, Out;
1928   {
1929     llvm::raw_svector_ostream OS(Buffer);
1930     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1931        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1932   }
1933 
1934   const Expr *Init = VD->getAnyInitializer();
1935   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1936     llvm::Constant *Ctor;
1937     llvm::Constant *ID;
1938     if (CGM.getLangOpts().OpenMPIsDevice) {
1939       // Generate function that re-emits the declaration's initializer into
1940       // the threadprivate copy of the variable VD
1941       CodeGenFunction CtorCGF(CGM);
1942 
1943       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1944       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1945       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1946           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1947       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1948       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1949                             FunctionArgList(), Loc, Loc);
1950       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1951       CtorCGF.EmitAnyExprToMem(Init,
1952                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1953                                Init->getType().getQualifiers(),
1954                                /*IsInitializer=*/true);
1955       CtorCGF.FinishFunction();
1956       Ctor = Fn;
1957       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1958       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1959     } else {
1960       Ctor = new llvm::GlobalVariable(
1961           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1962           llvm::GlobalValue::PrivateLinkage,
1963           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1964       ID = Ctor;
1965     }
1966 
1967     // Register the information for the entry associated with the constructor.
1968     Out.clear();
1969     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1970         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1971         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1972   }
1973   if (VD->getType().isDestructedType() != QualType::DK_none) {
1974     llvm::Constant *Dtor;
1975     llvm::Constant *ID;
1976     if (CGM.getLangOpts().OpenMPIsDevice) {
1977       // Generate function that emits destructor call for the threadprivate
1978       // copy of the variable VD
1979       CodeGenFunction DtorCGF(CGM);
1980 
1981       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1982       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1983       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1984           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1985       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1986       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1987                             FunctionArgList(), Loc, Loc);
1988       // Create a scope with an artificial location for the body of this
1989       // function.
1990       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1991       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1992                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1993                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1994       DtorCGF.FinishFunction();
1995       Dtor = Fn;
1996       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1997       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1998     } else {
1999       Dtor = new llvm::GlobalVariable(
2000           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2001           llvm::GlobalValue::PrivateLinkage,
2002           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2003       ID = Dtor;
2004     }
2005     // Register the information for the entry associated with the destructor.
2006     Out.clear();
2007     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2008         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2009         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2010   }
2011   return CGM.getLangOpts().OpenMPIsDevice;
2012 }
2013 
2014 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2015                                                           QualType VarType,
2016                                                           StringRef Name) {
2017   std::string Suffix = getName({"artificial", ""});
2018   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2019   llvm::Value *GAddr =
2020       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2021   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2022       CGM.getTarget().isTLSSupported()) {
2023     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2024     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2025   }
2026   std::string CacheSuffix = getName({"cache", ""});
2027   llvm::Value *Args[] = {
2028       emitUpdateLocation(CGF, SourceLocation()),
2029       getThreadID(CGF, SourceLocation()),
2030       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2031       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2032                                 /*isSigned=*/false),
2033       getOrCreateInternalVariable(
2034           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2035   return Address(
2036       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2037           CGF.EmitRuntimeCall(
2038               OMPBuilder.getOrCreateRuntimeFunction(
2039                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2040               Args),
2041           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2042       CGM.getContext().getTypeAlignInChars(VarType));
2043 }
2044 
2045 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2046                                    const RegionCodeGenTy &ThenGen,
2047                                    const RegionCodeGenTy &ElseGen) {
2048   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2049 
2050   // If the condition constant folds and can be elided, try to avoid emitting
2051   // the condition and the dead arm of the if/else.
2052   bool CondConstant;
2053   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2054     if (CondConstant)
2055       ThenGen(CGF);
2056     else
2057       ElseGen(CGF);
2058     return;
2059   }
2060 
2061   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2062   // emit the conditional branch.
2063   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2064   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2065   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2066   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2067 
2068   // Emit the 'then' code.
2069   CGF.EmitBlock(ThenBlock);
2070   ThenGen(CGF);
2071   CGF.EmitBranch(ContBlock);
2072   // Emit the 'else' code if present.
2073   // There is no need to emit line number for unconditional branch.
2074   (void)ApplyDebugLocation::CreateEmpty(CGF);
2075   CGF.EmitBlock(ElseBlock);
2076   ElseGen(CGF);
2077   // There is no need to emit line number for unconditional branch.
2078   (void)ApplyDebugLocation::CreateEmpty(CGF);
2079   CGF.EmitBranch(ContBlock);
2080   // Emit the continuation block for code after the if.
2081   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2082 }
2083 
2084 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2085                                        llvm::Function *OutlinedFn,
2086                                        ArrayRef<llvm::Value *> CapturedVars,
2087                                        const Expr *IfCond) {
2088   if (!CGF.HaveInsertPoint())
2089     return;
2090   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2091   auto &M = CGM.getModule();
2092   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2093                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2094     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2095     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2096     llvm::Value *Args[] = {
2097         RTLoc,
2098         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2099         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2100     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2101     RealArgs.append(std::begin(Args), std::end(Args));
2102     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2103 
2104     llvm::FunctionCallee RTLFn =
2105         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2106     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2107   };
2108   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2109                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2110     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2111     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2112     // Build calls:
2113     // __kmpc_serialized_parallel(&Loc, GTid);
2114     llvm::Value *Args[] = {RTLoc, ThreadID};
2115     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2116                             M, OMPRTL___kmpc_serialized_parallel),
2117                         Args);
2118 
2119     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2120     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2121     Address ZeroAddrBound =
2122         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2123                                          /*Name=*/".bound.zero.addr");
2124     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2125     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2126     // ThreadId for serialized parallels is 0.
2127     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2128     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2129     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2130 
2131     // Ensure we do not inline the function. This is trivially true for the ones
2132     // passed to __kmpc_fork_call but the ones called in serialized regions
2133     // could be inlined. This is not a perfect but it is closer to the invariant
2134     // we want, namely, every data environment starts with a new function.
2135     // TODO: We should pass the if condition to the runtime function and do the
2136     //       handling there. Much cleaner code.
2137     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2138     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2139     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2140 
2141     // __kmpc_end_serialized_parallel(&Loc, GTid);
2142     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2143     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2144                             M, OMPRTL___kmpc_end_serialized_parallel),
2145                         EndArgs);
2146   };
2147   if (IfCond) {
2148     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2149   } else {
2150     RegionCodeGenTy ThenRCG(ThenGen);
2151     ThenRCG(CGF);
2152   }
2153 }
2154 
2155 // If we're inside an (outlined) parallel region, use the region info's
2156 // thread-ID variable (it is passed in a first argument of the outlined function
2157 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2158 // regular serial code region, get thread ID by calling kmp_int32
2159 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2160 // return the address of that temp.
2161 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2162                                              SourceLocation Loc) {
2163   if (auto *OMPRegionInfo =
2164           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2165     if (OMPRegionInfo->getThreadIDVariable())
2166       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2167 
2168   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2169   QualType Int32Ty =
2170       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2171   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2172   CGF.EmitStoreOfScalar(ThreadID,
2173                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2174 
2175   return ThreadIDTemp;
2176 }
2177 
2178 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2179     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2180   SmallString<256> Buffer;
2181   llvm::raw_svector_ostream Out(Buffer);
2182   Out << Name;
2183   StringRef RuntimeName = Out.str();
2184   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2185   if (Elem.second) {
2186     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2187            "OMP internal variable has different type than requested");
2188     return &*Elem.second;
2189   }
2190 
2191   return Elem.second = new llvm::GlobalVariable(
2192              CGM.getModule(), Ty, /*IsConstant*/ false,
2193              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2194              Elem.first(), /*InsertBefore=*/nullptr,
2195              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2196 }
2197 
2198 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2199   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2200   std::string Name = getName({Prefix, "var"});
2201   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2202 }
2203 
2204 namespace {
2205 /// Common pre(post)-action for different OpenMP constructs.
2206 class CommonActionTy final : public PrePostActionTy {
2207   llvm::FunctionCallee EnterCallee;
2208   ArrayRef<llvm::Value *> EnterArgs;
2209   llvm::FunctionCallee ExitCallee;
2210   ArrayRef<llvm::Value *> ExitArgs;
2211   bool Conditional;
2212   llvm::BasicBlock *ContBlock = nullptr;
2213 
2214 public:
2215   CommonActionTy(llvm::FunctionCallee EnterCallee,
2216                  ArrayRef<llvm::Value *> EnterArgs,
2217                  llvm::FunctionCallee ExitCallee,
2218                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2219       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2220         ExitArgs(ExitArgs), Conditional(Conditional) {}
2221   void Enter(CodeGenFunction &CGF) override {
2222     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2223     if (Conditional) {
2224       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2225       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2226       ContBlock = CGF.createBasicBlock("omp_if.end");
2227       // Generate the branch (If-stmt)
2228       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2229       CGF.EmitBlock(ThenBlock);
2230     }
2231   }
2232   void Done(CodeGenFunction &CGF) {
2233     // Emit the rest of blocks/branches
2234     CGF.EmitBranch(ContBlock);
2235     CGF.EmitBlock(ContBlock, true);
2236   }
2237   void Exit(CodeGenFunction &CGF) override {
2238     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2239   }
2240 };
2241 } // anonymous namespace
2242 
2243 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2244                                          StringRef CriticalName,
2245                                          const RegionCodeGenTy &CriticalOpGen,
2246                                          SourceLocation Loc, const Expr *Hint) {
2247   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2248   // CriticalOpGen();
2249   // __kmpc_end_critical(ident_t *, gtid, Lock);
2250   // Prepare arguments and build a call to __kmpc_critical
2251   if (!CGF.HaveInsertPoint())
2252     return;
2253   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2254                          getCriticalRegionLock(CriticalName)};
2255   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2256                                                 std::end(Args));
2257   if (Hint) {
2258     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2259         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2260   }
2261   CommonActionTy Action(
2262       OMPBuilder.getOrCreateRuntimeFunction(
2263           CGM.getModule(),
2264           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2265       EnterArgs,
2266       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2267                                             OMPRTL___kmpc_end_critical),
2268       Args);
2269   CriticalOpGen.setAction(Action);
2270   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2271 }
2272 
2273 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2274                                        const RegionCodeGenTy &MasterOpGen,
2275                                        SourceLocation Loc) {
2276   if (!CGF.HaveInsertPoint())
2277     return;
2278   // if(__kmpc_master(ident_t *, gtid)) {
2279   //   MasterOpGen();
2280   //   __kmpc_end_master(ident_t *, gtid);
2281   // }
2282   // Prepare arguments and build a call to __kmpc_master
2283   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2284   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2285                             CGM.getModule(), OMPRTL___kmpc_master),
2286                         Args,
2287                         OMPBuilder.getOrCreateRuntimeFunction(
2288                             CGM.getModule(), OMPRTL___kmpc_end_master),
2289                         Args,
2290                         /*Conditional=*/true);
2291   MasterOpGen.setAction(Action);
2292   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2293   Action.Done(CGF);
2294 }
2295 
2296 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2297                                        const RegionCodeGenTy &MaskedOpGen,
2298                                        SourceLocation Loc, const Expr *Filter) {
2299   if (!CGF.HaveInsertPoint())
2300     return;
2301   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2302   //   MaskedOpGen();
2303   //   __kmpc_end_masked(iden_t *, gtid);
2304   // }
2305   // Prepare arguments and build a call to __kmpc_masked
2306   llvm::Value *FilterVal = Filter
2307                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2308                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2309   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2310                          FilterVal};
2311   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2312                             getThreadID(CGF, Loc)};
2313   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2314                             CGM.getModule(), OMPRTL___kmpc_masked),
2315                         Args,
2316                         OMPBuilder.getOrCreateRuntimeFunction(
2317                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2318                         ArgsEnd,
2319                         /*Conditional=*/true);
2320   MaskedOpGen.setAction(Action);
2321   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2322   Action.Done(CGF);
2323 }
2324 
2325 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2326                                         SourceLocation Loc) {
2327   if (!CGF.HaveInsertPoint())
2328     return;
2329   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2330     OMPBuilder.createTaskyield(CGF.Builder);
2331   } else {
2332     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2333     llvm::Value *Args[] = {
2334         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2335         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2336     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2337                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2338                         Args);
2339   }
2340 
2341   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2342     Region->emitUntiedSwitch(CGF);
2343 }
2344 
2345 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2346                                           const RegionCodeGenTy &TaskgroupOpGen,
2347                                           SourceLocation Loc) {
2348   if (!CGF.HaveInsertPoint())
2349     return;
2350   // __kmpc_taskgroup(ident_t *, gtid);
2351   // TaskgroupOpGen();
2352   // __kmpc_end_taskgroup(ident_t *, gtid);
2353   // Prepare arguments and build a call to __kmpc_taskgroup
2354   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2355   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2357                         Args,
2358                         OMPBuilder.getOrCreateRuntimeFunction(
2359                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2360                         Args);
2361   TaskgroupOpGen.setAction(Action);
2362   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2363 }
2364 
2365 /// Given an array of pointers to variables, project the address of a
2366 /// given variable.
2367 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2368                                       unsigned Index, const VarDecl *Var) {
2369   // Pull out the pointer to the variable.
2370   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2371   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2372 
2373   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2374   Addr = CGF.Builder.CreateElementBitCast(
2375       Addr, CGF.ConvertTypeForMem(Var->getType()));
2376   return Addr;
2377 }
2378 
2379 static llvm::Value *emitCopyprivateCopyFunction(
2380     CodeGenModule &CGM, llvm::Type *ArgsType,
2381     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2382     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2383     SourceLocation Loc) {
2384   ASTContext &C = CGM.getContext();
2385   // void copy_func(void *LHSArg, void *RHSArg);
2386   FunctionArgList Args;
2387   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2388                            ImplicitParamDecl::Other);
2389   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2390                            ImplicitParamDecl::Other);
2391   Args.push_back(&LHSArg);
2392   Args.push_back(&RHSArg);
2393   const auto &CGFI =
2394       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2395   std::string Name =
2396       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2397   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2398                                     llvm::GlobalValue::InternalLinkage, Name,
2399                                     &CGM.getModule());
2400   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2401   Fn->setDoesNotRecurse();
2402   CodeGenFunction CGF(CGM);
2403   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2404   // Dest = (void*[n])(LHSArg);
2405   // Src = (void*[n])(RHSArg);
2406   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2408       ArgsType), CGF.getPointerAlign());
2409   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2410       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2411       ArgsType), CGF.getPointerAlign());
2412   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2413   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2414   // ...
2415   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2416   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2417     const auto *DestVar =
2418         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2419     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2420 
2421     const auto *SrcVar =
2422         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2423     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2424 
2425     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2426     QualType Type = VD->getType();
2427     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2428   }
2429   CGF.FinishFunction();
2430   return Fn;
2431 }
2432 
2433 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2434                                        const RegionCodeGenTy &SingleOpGen,
2435                                        SourceLocation Loc,
2436                                        ArrayRef<const Expr *> CopyprivateVars,
2437                                        ArrayRef<const Expr *> SrcExprs,
2438                                        ArrayRef<const Expr *> DstExprs,
2439                                        ArrayRef<const Expr *> AssignmentOps) {
2440   if (!CGF.HaveInsertPoint())
2441     return;
2442   assert(CopyprivateVars.size() == SrcExprs.size() &&
2443          CopyprivateVars.size() == DstExprs.size() &&
2444          CopyprivateVars.size() == AssignmentOps.size());
2445   ASTContext &C = CGM.getContext();
2446   // int32 did_it = 0;
2447   // if(__kmpc_single(ident_t *, gtid)) {
2448   //   SingleOpGen();
2449   //   __kmpc_end_single(ident_t *, gtid);
2450   //   did_it = 1;
2451   // }
2452   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2453   // <copy_func>, did_it);
2454 
2455   Address DidIt = Address::invalid();
2456   if (!CopyprivateVars.empty()) {
2457     // int32 did_it = 0;
2458     QualType KmpInt32Ty =
2459         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2460     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2461     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2462   }
2463   // Prepare arguments and build a call to __kmpc_single
2464   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2465   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2466                             CGM.getModule(), OMPRTL___kmpc_single),
2467                         Args,
2468                         OMPBuilder.getOrCreateRuntimeFunction(
2469                             CGM.getModule(), OMPRTL___kmpc_end_single),
2470                         Args,
2471                         /*Conditional=*/true);
2472   SingleOpGen.setAction(Action);
2473   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2474   if (DidIt.isValid()) {
2475     // did_it = 1;
2476     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2477   }
2478   Action.Done(CGF);
2479   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2480   // <copy_func>, did_it);
2481   if (DidIt.isValid()) {
2482     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2483     QualType CopyprivateArrayTy = C.getConstantArrayType(
2484         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2485         /*IndexTypeQuals=*/0);
2486     // Create a list of all private variables for copyprivate.
2487     Address CopyprivateList =
2488         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2489     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2490       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2491       CGF.Builder.CreateStore(
2492           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2493               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2494               CGF.VoidPtrTy),
2495           Elem);
2496     }
2497     // Build function that copies private values from single region to all other
2498     // threads in the corresponding parallel region.
2499     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2500         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2501         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2502     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2503     Address CL =
2504       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2505                                                       CGF.VoidPtrTy);
2506     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2507     llvm::Value *Args[] = {
2508         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2509         getThreadID(CGF, Loc),        // i32 <gtid>
2510         BufSize,                      // size_t <buf_size>
2511         CL.getPointer(),              // void *<copyprivate list>
2512         CpyFn,                        // void (*) (void *, void *) <copy_func>
2513         DidItVal                      // i32 did_it
2514     };
2515     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2516                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2517                         Args);
2518   }
2519 }
2520 
2521 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2522                                         const RegionCodeGenTy &OrderedOpGen,
2523                                         SourceLocation Loc, bool IsThreads) {
2524   if (!CGF.HaveInsertPoint())
2525     return;
2526   // __kmpc_ordered(ident_t *, gtid);
2527   // OrderedOpGen();
2528   // __kmpc_end_ordered(ident_t *, gtid);
2529   // Prepare arguments and build a call to __kmpc_ordered
2530   if (IsThreads) {
2531     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2532     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2533                               CGM.getModule(), OMPRTL___kmpc_ordered),
2534                           Args,
2535                           OMPBuilder.getOrCreateRuntimeFunction(
2536                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2537                           Args);
2538     OrderedOpGen.setAction(Action);
2539     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540     return;
2541   }
2542   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2543 }
2544 
2545 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2546   unsigned Flags;
2547   if (Kind == OMPD_for)
2548     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2549   else if (Kind == OMPD_sections)
2550     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2551   else if (Kind == OMPD_single)
2552     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2553   else if (Kind == OMPD_barrier)
2554     Flags = OMP_IDENT_BARRIER_EXPL;
2555   else
2556     Flags = OMP_IDENT_BARRIER_IMPL;
2557   return Flags;
2558 }
2559 
2560 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2561     CodeGenFunction &CGF, const OMPLoopDirective &S,
2562     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2563   // Check if the loop directive is actually a doacross loop directive. In this
2564   // case choose static, 1 schedule.
2565   if (llvm::any_of(
2566           S.getClausesOfKind<OMPOrderedClause>(),
2567           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2568     ScheduleKind = OMPC_SCHEDULE_static;
2569     // Chunk size is 1 in this case.
2570     llvm::APInt ChunkSize(32, 1);
2571     ChunkExpr = IntegerLiteral::Create(
2572         CGF.getContext(), ChunkSize,
2573         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2574         SourceLocation());
2575   }
2576 }
2577 
2578 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2579                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2580                                       bool ForceSimpleCall) {
2581   // Check if we should use the OMPBuilder
2582   auto *OMPRegionInfo =
2583       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2584   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2585     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2586         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2587     return;
2588   }
2589 
2590   if (!CGF.HaveInsertPoint())
2591     return;
2592   // Build call __kmpc_cancel_barrier(loc, thread_id);
2593   // Build call __kmpc_barrier(loc, thread_id);
2594   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2595   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2596   // thread_id);
2597   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2598                          getThreadID(CGF, Loc)};
2599   if (OMPRegionInfo) {
2600     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2601       llvm::Value *Result = CGF.EmitRuntimeCall(
2602           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2603                                                 OMPRTL___kmpc_cancel_barrier),
2604           Args);
2605       if (EmitChecks) {
2606         // if (__kmpc_cancel_barrier()) {
2607         //   exit from construct;
2608         // }
2609         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2610         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2611         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2612         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2613         CGF.EmitBlock(ExitBB);
2614         //   exit from construct;
2615         CodeGenFunction::JumpDest CancelDestination =
2616             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2617         CGF.EmitBranchThroughCleanup(CancelDestination);
2618         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2619       }
2620       return;
2621     }
2622   }
2623   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2624                           CGM.getModule(), OMPRTL___kmpc_barrier),
2625                       Args);
2626 }
2627 
2628 /// Map the OpenMP loop schedule to the runtime enumeration.
2629 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2630                                           bool Chunked, bool Ordered) {
2631   switch (ScheduleKind) {
2632   case OMPC_SCHEDULE_static:
2633     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2634                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2635   case OMPC_SCHEDULE_dynamic:
2636     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2637   case OMPC_SCHEDULE_guided:
2638     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2639   case OMPC_SCHEDULE_runtime:
2640     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2641   case OMPC_SCHEDULE_auto:
2642     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2643   case OMPC_SCHEDULE_unknown:
2644     assert(!Chunked && "chunk was specified but schedule kind not known");
2645     return Ordered ? OMP_ord_static : OMP_sch_static;
2646   }
2647   llvm_unreachable("Unexpected runtime schedule");
2648 }
2649 
2650 /// Map the OpenMP distribute schedule to the runtime enumeration.
2651 static OpenMPSchedType
2652 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2653   // only static is allowed for dist_schedule
2654   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2655 }
2656 
2657 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2658                                          bool Chunked) const {
2659   OpenMPSchedType Schedule =
2660       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2661   return Schedule == OMP_sch_static;
2662 }
2663 
2664 bool CGOpenMPRuntime::isStaticNonchunked(
2665     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2666   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2667   return Schedule == OMP_dist_sch_static;
2668 }
2669 
2670 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2671                                       bool Chunked) const {
2672   OpenMPSchedType Schedule =
2673       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2674   return Schedule == OMP_sch_static_chunked;
2675 }
2676 
2677 bool CGOpenMPRuntime::isStaticChunked(
2678     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2679   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2680   return Schedule == OMP_dist_sch_static_chunked;
2681 }
2682 
2683 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2684   OpenMPSchedType Schedule =
2685       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2686   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2687   return Schedule != OMP_sch_static;
2688 }
2689 
2690 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2691                                   OpenMPScheduleClauseModifier M1,
2692                                   OpenMPScheduleClauseModifier M2) {
2693   int Modifier = 0;
2694   switch (M1) {
2695   case OMPC_SCHEDULE_MODIFIER_monotonic:
2696     Modifier = OMP_sch_modifier_monotonic;
2697     break;
2698   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2699     Modifier = OMP_sch_modifier_nonmonotonic;
2700     break;
2701   case OMPC_SCHEDULE_MODIFIER_simd:
2702     if (Schedule == OMP_sch_static_chunked)
2703       Schedule = OMP_sch_static_balanced_chunked;
2704     break;
2705   case OMPC_SCHEDULE_MODIFIER_last:
2706   case OMPC_SCHEDULE_MODIFIER_unknown:
2707     break;
2708   }
2709   switch (M2) {
2710   case OMPC_SCHEDULE_MODIFIER_monotonic:
2711     Modifier = OMP_sch_modifier_monotonic;
2712     break;
2713   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2714     Modifier = OMP_sch_modifier_nonmonotonic;
2715     break;
2716   case OMPC_SCHEDULE_MODIFIER_simd:
2717     if (Schedule == OMP_sch_static_chunked)
2718       Schedule = OMP_sch_static_balanced_chunked;
2719     break;
2720   case OMPC_SCHEDULE_MODIFIER_last:
2721   case OMPC_SCHEDULE_MODIFIER_unknown:
2722     break;
2723   }
2724   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2725   // If the static schedule kind is specified or if the ordered clause is
2726   // specified, and if the nonmonotonic modifier is not specified, the effect is
2727   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2728   // modifier is specified, the effect is as if the nonmonotonic modifier is
2729   // specified.
2730   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2731     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2732           Schedule == OMP_sch_static_balanced_chunked ||
2733           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2734           Schedule == OMP_dist_sch_static_chunked ||
2735           Schedule == OMP_dist_sch_static))
2736       Modifier = OMP_sch_modifier_nonmonotonic;
2737   }
2738   return Schedule | Modifier;
2739 }
2740 
2741 void CGOpenMPRuntime::emitForDispatchInit(
2742     CodeGenFunction &CGF, SourceLocation Loc,
2743     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2744     bool Ordered, const DispatchRTInput &DispatchValues) {
2745   if (!CGF.HaveInsertPoint())
2746     return;
2747   OpenMPSchedType Schedule = getRuntimeSchedule(
2748       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2749   assert(Ordered ||
2750          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2751           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2752           Schedule != OMP_sch_static_balanced_chunked));
2753   // Call __kmpc_dispatch_init(
2754   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2755   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2756   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2757 
2758   // If the Chunk was not specified in the clause - use default value 1.
2759   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2760                                             : CGF.Builder.getIntN(IVSize, 1);
2761   llvm::Value *Args[] = {
2762       emitUpdateLocation(CGF, Loc),
2763       getThreadID(CGF, Loc),
2764       CGF.Builder.getInt32(addMonoNonMonoModifier(
2765           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2766       DispatchValues.LB,                                     // Lower
2767       DispatchValues.UB,                                     // Upper
2768       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2769       Chunk                                                  // Chunk
2770   };
2771   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2772 }
2773 
2774 static void emitForStaticInitCall(
2775     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2776     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2777     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2778     const CGOpenMPRuntime::StaticRTInput &Values) {
2779   if (!CGF.HaveInsertPoint())
2780     return;
2781 
2782   assert(!Values.Ordered);
2783   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2784          Schedule == OMP_sch_static_balanced_chunked ||
2785          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2786          Schedule == OMP_dist_sch_static ||
2787          Schedule == OMP_dist_sch_static_chunked);
2788 
2789   // Call __kmpc_for_static_init(
2790   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2791   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2792   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2793   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2794   llvm::Value *Chunk = Values.Chunk;
2795   if (Chunk == nullptr) {
2796     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2797             Schedule == OMP_dist_sch_static) &&
2798            "expected static non-chunked schedule");
2799     // If the Chunk was not specified in the clause - use default value 1.
2800     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2801   } else {
2802     assert((Schedule == OMP_sch_static_chunked ||
2803             Schedule == OMP_sch_static_balanced_chunked ||
2804             Schedule == OMP_ord_static_chunked ||
2805             Schedule == OMP_dist_sch_static_chunked) &&
2806            "expected static chunked schedule");
2807   }
2808   llvm::Value *Args[] = {
2809       UpdateLocation,
2810       ThreadId,
2811       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2812                                                   M2)), // Schedule type
2813       Values.IL.getPointer(),                           // &isLastIter
2814       Values.LB.getPointer(),                           // &LB
2815       Values.UB.getPointer(),                           // &UB
2816       Values.ST.getPointer(),                           // &Stride
2817       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2818       Chunk                                             // Chunk
2819   };
2820   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2821 }
2822 
2823 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2824                                         SourceLocation Loc,
2825                                         OpenMPDirectiveKind DKind,
2826                                         const OpenMPScheduleTy &ScheduleKind,
2827                                         const StaticRTInput &Values) {
2828   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2829       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2830   assert(isOpenMPWorksharingDirective(DKind) &&
2831          "Expected loop-based or sections-based directive.");
2832   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2833                                              isOpenMPLoopDirective(DKind)
2834                                                  ? OMP_IDENT_WORK_LOOP
2835                                                  : OMP_IDENT_WORK_SECTIONS);
2836   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2837   llvm::FunctionCallee StaticInitFunction =
2838       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2839   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2840   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2841                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2842 }
2843 
2844 void CGOpenMPRuntime::emitDistributeStaticInit(
2845     CodeGenFunction &CGF, SourceLocation Loc,
2846     OpenMPDistScheduleClauseKind SchedKind,
2847     const CGOpenMPRuntime::StaticRTInput &Values) {
2848   OpenMPSchedType ScheduleNum =
2849       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2850   llvm::Value *UpdatedLocation =
2851       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2852   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2853   llvm::FunctionCallee StaticInitFunction;
2854   bool isGPUDistribute =
2855       CGM.getLangOpts().OpenMPIsDevice &&
2856       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2857   StaticInitFunction = createForStaticInitFunction(
2858       Values.IVSize, Values.IVSigned, isGPUDistribute);
2859 
2860   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2861                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2862                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2863 }
2864 
2865 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2866                                           SourceLocation Loc,
2867                                           OpenMPDirectiveKind DKind) {
2868   if (!CGF.HaveInsertPoint())
2869     return;
2870   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2871   llvm::Value *Args[] = {
2872       emitUpdateLocation(CGF, Loc,
2873                          isOpenMPDistributeDirective(DKind)
2874                              ? OMP_IDENT_WORK_DISTRIBUTE
2875                              : isOpenMPLoopDirective(DKind)
2876                                    ? OMP_IDENT_WORK_LOOP
2877                                    : OMP_IDENT_WORK_SECTIONS),
2878       getThreadID(CGF, Loc)};
2879   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2880   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2881       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2882     CGF.EmitRuntimeCall(
2883         OMPBuilder.getOrCreateRuntimeFunction(
2884             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2885         Args);
2886   else
2887     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2888                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2889                         Args);
2890 }
2891 
2892 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2893                                                  SourceLocation Loc,
2894                                                  unsigned IVSize,
2895                                                  bool IVSigned) {
2896   if (!CGF.HaveInsertPoint())
2897     return;
2898   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2899   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2900   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2901 }
2902 
2903 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2904                                           SourceLocation Loc, unsigned IVSize,
2905                                           bool IVSigned, Address IL,
2906                                           Address LB, Address UB,
2907                                           Address ST) {
2908   // Call __kmpc_dispatch_next(
2909   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2910   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2911   //          kmp_int[32|64] *p_stride);
2912   llvm::Value *Args[] = {
2913       emitUpdateLocation(CGF, Loc),
2914       getThreadID(CGF, Loc),
2915       IL.getPointer(), // &isLastIter
2916       LB.getPointer(), // &Lower
2917       UB.getPointer(), // &Upper
2918       ST.getPointer()  // &Stride
2919   };
2920   llvm::Value *Call =
2921       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2922   return CGF.EmitScalarConversion(
2923       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2924       CGF.getContext().BoolTy, Loc);
2925 }
2926 
2927 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2928                                            llvm::Value *NumThreads,
2929                                            SourceLocation Loc) {
2930   if (!CGF.HaveInsertPoint())
2931     return;
2932   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2933   llvm::Value *Args[] = {
2934       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2935       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2936   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2937                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2938                       Args);
2939 }
2940 
2941 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2942                                          ProcBindKind ProcBind,
2943                                          SourceLocation Loc) {
2944   if (!CGF.HaveInsertPoint())
2945     return;
2946   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2947   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2948   llvm::Value *Args[] = {
2949       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2950       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2951   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2952                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2953                       Args);
2954 }
2955 
2956 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2957                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2958   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2959     OMPBuilder.createFlush(CGF.Builder);
2960   } else {
2961     if (!CGF.HaveInsertPoint())
2962       return;
2963     // Build call void __kmpc_flush(ident_t *loc)
2964     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2965                             CGM.getModule(), OMPRTL___kmpc_flush),
2966                         emitUpdateLocation(CGF, Loc));
2967   }
2968 }
2969 
2970 namespace {
2971 /// Indexes of fields for type kmp_task_t.
2972 enum KmpTaskTFields {
2973   /// List of shared variables.
2974   KmpTaskTShareds,
2975   /// Task routine.
2976   KmpTaskTRoutine,
2977   /// Partition id for the untied tasks.
2978   KmpTaskTPartId,
2979   /// Function with call of destructors for private variables.
2980   Data1,
2981   /// Task priority.
2982   Data2,
2983   /// (Taskloops only) Lower bound.
2984   KmpTaskTLowerBound,
2985   /// (Taskloops only) Upper bound.
2986   KmpTaskTUpperBound,
2987   /// (Taskloops only) Stride.
2988   KmpTaskTStride,
2989   /// (Taskloops only) Is last iteration flag.
2990   KmpTaskTLastIter,
2991   /// (Taskloops only) Reduction data.
2992   KmpTaskTReductions,
2993 };
2994 } // anonymous namespace
2995 
2996 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2997   return OffloadEntriesTargetRegion.empty() &&
2998          OffloadEntriesDeviceGlobalVar.empty();
2999 }
3000 
3001 /// Initialize target region entry.
3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3003     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3004                                     StringRef ParentName, unsigned LineNum,
3005                                     unsigned Order) {
3006   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3007                                              "only required for the device "
3008                                              "code generation.");
3009   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3010       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3011                                    OMPTargetRegionEntryTargetRegion);
3012   ++OffloadingEntriesNum;
3013 }
3014 
3015 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3016     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3017                                   StringRef ParentName, unsigned LineNum,
3018                                   llvm::Constant *Addr, llvm::Constant *ID,
3019                                   OMPTargetRegionEntryKind Flags) {
3020   // If we are emitting code for a target, the entry is already initialized,
3021   // only has to be registered.
3022   if (CGM.getLangOpts().OpenMPIsDevice) {
3023     // This could happen if the device compilation is invoked standalone.
3024     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3025       return;
3026     auto &Entry =
3027         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3028     Entry.setAddress(Addr);
3029     Entry.setID(ID);
3030     Entry.setFlags(Flags);
3031   } else {
3032     if (Flags ==
3033             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3034         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3035                                  /*IgnoreAddressId*/ true))
3036       return;
3037     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3038            "Target region entry already registered!");
3039     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3040     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3041     ++OffloadingEntriesNum;
3042   }
3043 }
3044 
3045 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3046     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3047     bool IgnoreAddressId) const {
3048   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3049   if (PerDevice == OffloadEntriesTargetRegion.end())
3050     return false;
3051   auto PerFile = PerDevice->second.find(FileID);
3052   if (PerFile == PerDevice->second.end())
3053     return false;
3054   auto PerParentName = PerFile->second.find(ParentName);
3055   if (PerParentName == PerFile->second.end())
3056     return false;
3057   auto PerLine = PerParentName->second.find(LineNum);
3058   if (PerLine == PerParentName->second.end())
3059     return false;
3060   // Fail if this entry is already registered.
3061   if (!IgnoreAddressId &&
3062       (PerLine->second.getAddress() || PerLine->second.getID()))
3063     return false;
3064   return true;
3065 }
3066 
3067 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3068     const OffloadTargetRegionEntryInfoActTy &Action) {
3069   // Scan all target region entries and perform the provided action.
3070   for (const auto &D : OffloadEntriesTargetRegion)
3071     for (const auto &F : D.second)
3072       for (const auto &P : F.second)
3073         for (const auto &L : P.second)
3074           Action(D.first, F.first, P.first(), L.first, L.second);
3075 }
3076 
3077 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3078     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3079                                        OMPTargetGlobalVarEntryKind Flags,
3080                                        unsigned Order) {
3081   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3082                                              "only required for the device "
3083                                              "code generation.");
3084   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3085   ++OffloadingEntriesNum;
3086 }
3087 
3088 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3089     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3090                                      CharUnits VarSize,
3091                                      OMPTargetGlobalVarEntryKind Flags,
3092                                      llvm::GlobalValue::LinkageTypes Linkage) {
3093   if (CGM.getLangOpts().OpenMPIsDevice) {
3094     // This could happen if the device compilation is invoked standalone.
3095     if (!hasDeviceGlobalVarEntryInfo(VarName))
3096       return;
3097     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3098     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3099       if (Entry.getVarSize().isZero()) {
3100         Entry.setVarSize(VarSize);
3101         Entry.setLinkage(Linkage);
3102       }
3103       return;
3104     }
3105     Entry.setVarSize(VarSize);
3106     Entry.setLinkage(Linkage);
3107     Entry.setAddress(Addr);
3108   } else {
3109     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3110       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3111       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3112              "Entry not initialized!");
3113       if (Entry.getVarSize().isZero()) {
3114         Entry.setVarSize(VarSize);
3115         Entry.setLinkage(Linkage);
3116       }
3117       return;
3118     }
3119     OffloadEntriesDeviceGlobalVar.try_emplace(
3120         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3121     ++OffloadingEntriesNum;
3122   }
3123 }
3124 
3125 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3126     actOnDeviceGlobalVarEntriesInfo(
3127         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3128   // Scan all target region entries and perform the provided action.
3129   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3130     Action(E.getKey(), E.getValue());
3131 }
3132 
3133 void CGOpenMPRuntime::createOffloadEntry(
3134     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3135     llvm::GlobalValue::LinkageTypes Linkage) {
3136   StringRef Name = Addr->getName();
3137   llvm::Module &M = CGM.getModule();
3138   llvm::LLVMContext &C = M.getContext();
3139 
3140   // Create constant string with the name.
3141   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3142 
3143   std::string StringName = getName({"omp_offloading", "entry_name"});
3144   auto *Str = new llvm::GlobalVariable(
3145       M, StrPtrInit->getType(), /*isConstant=*/true,
3146       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3147   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3148 
3149   llvm::Constant *Data[] = {
3150       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3151       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3152       llvm::ConstantInt::get(CGM.SizeTy, Size),
3153       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3154       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3155   std::string EntryName = getName({"omp_offloading", "entry", ""});
3156   llvm::GlobalVariable *Entry = createGlobalStruct(
3157       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3158       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3159 
3160   // The entry has to be created in the section the linker expects it to be.
3161   Entry->setSection("omp_offloading_entries");
3162 }
3163 
3164 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3165   // Emit the offloading entries and metadata so that the device codegen side
3166   // can easily figure out what to emit. The produced metadata looks like
3167   // this:
3168   //
3169   // !omp_offload.info = !{!1, ...}
3170   //
3171   // Right now we only generate metadata for function that contain target
3172   // regions.
3173 
3174   // If we are in simd mode or there are no entries, we don't need to do
3175   // anything.
3176   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3177     return;
3178 
3179   llvm::Module &M = CGM.getModule();
3180   llvm::LLVMContext &C = M.getContext();
3181   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3182                          SourceLocation, StringRef>,
3183               16>
3184       OrderedEntries(OffloadEntriesInfoManager.size());
3185   llvm::SmallVector<StringRef, 16> ParentFunctions(
3186       OffloadEntriesInfoManager.size());
3187 
3188   // Auxiliary methods to create metadata values and strings.
3189   auto &&GetMDInt = [this](unsigned V) {
3190     return llvm::ConstantAsMetadata::get(
3191         llvm::ConstantInt::get(CGM.Int32Ty, V));
3192   };
3193 
3194   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3195 
3196   // Create the offloading info metadata node.
3197   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3198 
3199   // Create function that emits metadata for each target region entry;
3200   auto &&TargetRegionMetadataEmitter =
3201       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3202        &GetMDString](
3203           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3204           unsigned Line,
3205           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3206         // Generate metadata for target regions. Each entry of this metadata
3207         // contains:
3208         // - Entry 0 -> Kind of this type of metadata (0).
3209         // - Entry 1 -> Device ID of the file where the entry was identified.
3210         // - Entry 2 -> File ID of the file where the entry was identified.
3211         // - Entry 3 -> Mangled name of the function where the entry was
3212         // identified.
3213         // - Entry 4 -> Line in the file where the entry was identified.
3214         // - Entry 5 -> Order the entry was created.
3215         // The first element of the metadata node is the kind.
3216         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3217                                  GetMDInt(FileID),      GetMDString(ParentName),
3218                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3219 
3220         SourceLocation Loc;
3221         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3222                   E = CGM.getContext().getSourceManager().fileinfo_end();
3223              I != E; ++I) {
3224           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3225               I->getFirst()->getUniqueID().getFile() == FileID) {
3226             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3227                 I->getFirst(), Line, 1);
3228             break;
3229           }
3230         }
3231         // Save this entry in the right position of the ordered entries array.
3232         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3233         ParentFunctions[E.getOrder()] = ParentName;
3234 
3235         // Add metadata to the named metadata node.
3236         MD->addOperand(llvm::MDNode::get(C, Ops));
3237       };
3238 
3239   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3240       TargetRegionMetadataEmitter);
3241 
3242   // Create function that emits metadata for each device global variable entry;
3243   auto &&DeviceGlobalVarMetadataEmitter =
3244       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3245        MD](StringRef MangledName,
3246            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3247                &E) {
3248         // Generate metadata for global variables. Each entry of this metadata
3249         // contains:
3250         // - Entry 0 -> Kind of this type of metadata (1).
3251         // - Entry 1 -> Mangled name of the variable.
3252         // - Entry 2 -> Declare target kind.
3253         // - Entry 3 -> Order the entry was created.
3254         // The first element of the metadata node is the kind.
3255         llvm::Metadata *Ops[] = {
3256             GetMDInt(E.getKind()), GetMDString(MangledName),
3257             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3258 
3259         // Save this entry in the right position of the ordered entries array.
3260         OrderedEntries[E.getOrder()] =
3261             std::make_tuple(&E, SourceLocation(), MangledName);
3262 
3263         // Add metadata to the named metadata node.
3264         MD->addOperand(llvm::MDNode::get(C, Ops));
3265       };
3266 
3267   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3268       DeviceGlobalVarMetadataEmitter);
3269 
3270   for (const auto &E : OrderedEntries) {
3271     assert(std::get<0>(E) && "All ordered entries must exist!");
3272     if (const auto *CE =
3273             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3274                 std::get<0>(E))) {
3275       if (!CE->getID() || !CE->getAddress()) {
3276         // Do not blame the entry if the parent funtion is not emitted.
3277         StringRef FnName = ParentFunctions[CE->getOrder()];
3278         if (!CGM.GetGlobalValue(FnName))
3279           continue;
3280         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3281             DiagnosticsEngine::Error,
3282             "Offloading entry for target region in %0 is incorrect: either the "
3283             "address or the ID is invalid.");
3284         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3285         continue;
3286       }
3287       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3288                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3289     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3290                                              OffloadEntryInfoDeviceGlobalVar>(
3291                    std::get<0>(E))) {
3292       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3293           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3294               CE->getFlags());
3295       switch (Flags) {
3296       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3297         if (CGM.getLangOpts().OpenMPIsDevice &&
3298             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3299           continue;
3300         if (!CE->getAddress()) {
3301           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3302               DiagnosticsEngine::Error, "Offloading entry for declare target "
3303                                         "variable %0 is incorrect: the "
3304                                         "address is invalid.");
3305           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3306           continue;
3307         }
3308         // The vaiable has no definition - no need to add the entry.
3309         if (CE->getVarSize().isZero())
3310           continue;
3311         break;
3312       }
3313       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3314         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3315                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3316                "Declaret target link address is set.");
3317         if (CGM.getLangOpts().OpenMPIsDevice)
3318           continue;
3319         if (!CE->getAddress()) {
3320           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3321               DiagnosticsEngine::Error,
3322               "Offloading entry for declare target variable is incorrect: the "
3323               "address is invalid.");
3324           CGM.getDiags().Report(DiagID);
3325           continue;
3326         }
3327         break;
3328       }
3329       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3330                          CE->getVarSize().getQuantity(), Flags,
3331                          CE->getLinkage());
3332     } else {
3333       llvm_unreachable("Unsupported entry kind.");
3334     }
3335   }
3336 }
3337 
3338 /// Loads all the offload entries information from the host IR
3339 /// metadata.
3340 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3341   // If we are in target mode, load the metadata from the host IR. This code has
3342   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3343 
3344   if (!CGM.getLangOpts().OpenMPIsDevice)
3345     return;
3346 
3347   if (CGM.getLangOpts().OMPHostIRFile.empty())
3348     return;
3349 
3350   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3351   if (auto EC = Buf.getError()) {
3352     CGM.getDiags().Report(diag::err_cannot_open_file)
3353         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3354     return;
3355   }
3356 
3357   llvm::LLVMContext C;
3358   auto ME = expectedToErrorOrAndEmitErrors(
3359       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3360 
3361   if (auto EC = ME.getError()) {
3362     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3363         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3364     CGM.getDiags().Report(DiagID)
3365         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3366     return;
3367   }
3368 
3369   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3370   if (!MD)
3371     return;
3372 
3373   for (llvm::MDNode *MN : MD->operands()) {
3374     auto &&GetMDInt = [MN](unsigned Idx) {
3375       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3376       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3377     };
3378 
3379     auto &&GetMDString = [MN](unsigned Idx) {
3380       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3381       return V->getString();
3382     };
3383 
3384     switch (GetMDInt(0)) {
3385     default:
3386       llvm_unreachable("Unexpected metadata!");
3387       break;
3388     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3389         OffloadingEntryInfoTargetRegion:
3390       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3391           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3392           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3393           /*Order=*/GetMDInt(5));
3394       break;
3395     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3396         OffloadingEntryInfoDeviceGlobalVar:
3397       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3398           /*MangledName=*/GetMDString(1),
3399           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3400               /*Flags=*/GetMDInt(2)),
3401           /*Order=*/GetMDInt(3));
3402       break;
3403     }
3404   }
3405 }
3406 
3407 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3408   if (!KmpRoutineEntryPtrTy) {
3409     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3410     ASTContext &C = CGM.getContext();
3411     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3412     FunctionProtoType::ExtProtoInfo EPI;
3413     KmpRoutineEntryPtrQTy = C.getPointerType(
3414         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3415     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3416   }
3417 }
3418 
3419 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3420   // Make sure the type of the entry is already created. This is the type we
3421   // have to create:
3422   // struct __tgt_offload_entry{
3423   //   void      *addr;       // Pointer to the offload entry info.
3424   //                          // (function or global)
3425   //   char      *name;       // Name of the function or global.
3426   //   size_t     size;       // Size of the entry info (0 if it a function).
3427   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3428   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3429   // };
3430   if (TgtOffloadEntryQTy.isNull()) {
3431     ASTContext &C = CGM.getContext();
3432     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3433     RD->startDefinition();
3434     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3435     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3436     addFieldToRecordDecl(C, RD, C.getSizeType());
3437     addFieldToRecordDecl(
3438         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3439     addFieldToRecordDecl(
3440         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3441     RD->completeDefinition();
3442     RD->addAttr(PackedAttr::CreateImplicit(C));
3443     TgtOffloadEntryQTy = C.getRecordType(RD);
3444   }
3445   return TgtOffloadEntryQTy;
3446 }
3447 
3448 namespace {
3449 struct PrivateHelpersTy {
3450   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3451                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3452       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3453         PrivateElemInit(PrivateElemInit) {}
3454   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3455   const Expr *OriginalRef = nullptr;
3456   const VarDecl *Original = nullptr;
3457   const VarDecl *PrivateCopy = nullptr;
3458   const VarDecl *PrivateElemInit = nullptr;
3459   bool isLocalPrivate() const {
3460     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3461   }
3462 };
3463 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3464 } // anonymous namespace
3465 
3466 static bool isAllocatableDecl(const VarDecl *VD) {
3467   const VarDecl *CVD = VD->getCanonicalDecl();
3468   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3469     return false;
3470   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3471   // Use the default allocation.
3472   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3473             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3474            !AA->getAllocator());
3475 }
3476 
3477 static RecordDecl *
3478 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3479   if (!Privates.empty()) {
3480     ASTContext &C = CGM.getContext();
3481     // Build struct .kmp_privates_t. {
3482     //         /*  private vars  */
3483     //       };
3484     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3485     RD->startDefinition();
3486     for (const auto &Pair : Privates) {
3487       const VarDecl *VD = Pair.second.Original;
3488       QualType Type = VD->getType().getNonReferenceType();
3489       // If the private variable is a local variable with lvalue ref type,
3490       // allocate the pointer instead of the pointee type.
3491       if (Pair.second.isLocalPrivate()) {
3492         if (VD->getType()->isLValueReferenceType())
3493           Type = C.getPointerType(Type);
3494         if (isAllocatableDecl(VD))
3495           Type = C.getPointerType(Type);
3496       }
3497       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3498       if (VD->hasAttrs()) {
3499         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3500              E(VD->getAttrs().end());
3501              I != E; ++I)
3502           FD->addAttr(*I);
3503       }
3504     }
3505     RD->completeDefinition();
3506     return RD;
3507   }
3508   return nullptr;
3509 }
3510 
3511 static RecordDecl *
3512 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3513                          QualType KmpInt32Ty,
3514                          QualType KmpRoutineEntryPointerQTy) {
3515   ASTContext &C = CGM.getContext();
3516   // Build struct kmp_task_t {
3517   //         void *              shareds;
3518   //         kmp_routine_entry_t routine;
3519   //         kmp_int32           part_id;
3520   //         kmp_cmplrdata_t data1;
3521   //         kmp_cmplrdata_t data2;
3522   // For taskloops additional fields:
3523   //         kmp_uint64          lb;
3524   //         kmp_uint64          ub;
3525   //         kmp_int64           st;
3526   //         kmp_int32           liter;
3527   //         void *              reductions;
3528   //       };
3529   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3530   UD->startDefinition();
3531   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3532   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3533   UD->completeDefinition();
3534   QualType KmpCmplrdataTy = C.getRecordType(UD);
3535   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3536   RD->startDefinition();
3537   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3538   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3539   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3540   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3541   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3542   if (isOpenMPTaskLoopDirective(Kind)) {
3543     QualType KmpUInt64Ty =
3544         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3545     QualType KmpInt64Ty =
3546         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3547     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3548     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3549     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3550     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3551     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3552   }
3553   RD->completeDefinition();
3554   return RD;
3555 }
3556 
3557 static RecordDecl *
3558 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3559                                      ArrayRef<PrivateDataTy> Privates) {
3560   ASTContext &C = CGM.getContext();
3561   // Build struct kmp_task_t_with_privates {
3562   //         kmp_task_t task_data;
3563   //         .kmp_privates_t. privates;
3564   //       };
3565   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3566   RD->startDefinition();
3567   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3568   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3569     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3570   RD->completeDefinition();
3571   return RD;
3572 }
3573 
3574 /// Emit a proxy function which accepts kmp_task_t as the second
3575 /// argument.
3576 /// \code
3577 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3578 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3579 ///   For taskloops:
3580 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3581 ///   tt->reductions, tt->shareds);
3582 ///   return 0;
3583 /// }
3584 /// \endcode
3585 static llvm::Function *
3586 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3587                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3588                       QualType KmpTaskTWithPrivatesPtrQTy,
3589                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3590                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3591                       llvm::Value *TaskPrivatesMap) {
3592   ASTContext &C = CGM.getContext();
3593   FunctionArgList Args;
3594   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3595                             ImplicitParamDecl::Other);
3596   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3597                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3598                                 ImplicitParamDecl::Other);
3599   Args.push_back(&GtidArg);
3600   Args.push_back(&TaskTypeArg);
3601   const auto &TaskEntryFnInfo =
3602       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3603   llvm::FunctionType *TaskEntryTy =
3604       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3605   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3606   auto *TaskEntry = llvm::Function::Create(
3607       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3608   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3609   TaskEntry->setDoesNotRecurse();
3610   CodeGenFunction CGF(CGM);
3611   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3612                     Loc, Loc);
3613 
3614   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3615   // tt,
3616   // For taskloops:
3617   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3618   // tt->task_data.shareds);
3619   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3620       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3621   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3622       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3623       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3624   const auto *KmpTaskTWithPrivatesQTyRD =
3625       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3626   LValue Base =
3627       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3628   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3629   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3630   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3631   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3632 
3633   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3634   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3635   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3636       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3637       CGF.ConvertTypeForMem(SharedsPtrTy));
3638 
3639   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3640   llvm::Value *PrivatesParam;
3641   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3642     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3643     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3644         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3645   } else {
3646     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3647   }
3648 
3649   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3650                                TaskPrivatesMap,
3651                                CGF.Builder
3652                                    .CreatePointerBitCastOrAddrSpaceCast(
3653                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3654                                    .getPointer()};
3655   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3656                                           std::end(CommonArgs));
3657   if (isOpenMPTaskLoopDirective(Kind)) {
3658     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3659     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3660     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3661     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3662     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3663     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3664     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3665     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3666     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3667     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3668     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3669     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3670     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3671     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3672     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3673     CallArgs.push_back(LBParam);
3674     CallArgs.push_back(UBParam);
3675     CallArgs.push_back(StParam);
3676     CallArgs.push_back(LIParam);
3677     CallArgs.push_back(RParam);
3678   }
3679   CallArgs.push_back(SharedsParam);
3680 
3681   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3682                                                   CallArgs);
3683   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3684                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3685   CGF.FinishFunction();
3686   return TaskEntry;
3687 }
3688 
3689 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3690                                             SourceLocation Loc,
3691                                             QualType KmpInt32Ty,
3692                                             QualType KmpTaskTWithPrivatesPtrQTy,
3693                                             QualType KmpTaskTWithPrivatesQTy) {
3694   ASTContext &C = CGM.getContext();
3695   FunctionArgList Args;
3696   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3697                             ImplicitParamDecl::Other);
3698   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3699                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3700                                 ImplicitParamDecl::Other);
3701   Args.push_back(&GtidArg);
3702   Args.push_back(&TaskTypeArg);
3703   const auto &DestructorFnInfo =
3704       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3705   llvm::FunctionType *DestructorFnTy =
3706       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3707   std::string Name =
3708       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3709   auto *DestructorFn =
3710       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3711                              Name, &CGM.getModule());
3712   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3713                                     DestructorFnInfo);
3714   DestructorFn->setDoesNotRecurse();
3715   CodeGenFunction CGF(CGM);
3716   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3717                     Args, Loc, Loc);
3718 
3719   LValue Base = CGF.EmitLoadOfPointerLValue(
3720       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3721       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3722   const auto *KmpTaskTWithPrivatesQTyRD =
3723       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3724   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3725   Base = CGF.EmitLValueForField(Base, *FI);
3726   for (const auto *Field :
3727        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3728     if (QualType::DestructionKind DtorKind =
3729             Field->getType().isDestructedType()) {
3730       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3731       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3732     }
3733   }
3734   CGF.FinishFunction();
3735   return DestructorFn;
3736 }
3737 
3738 /// Emit a privates mapping function for correct handling of private and
3739 /// firstprivate variables.
3740 /// \code
3741 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3742 /// **noalias priv1,...,  <tyn> **noalias privn) {
3743 ///   *priv1 = &.privates.priv1;
3744 ///   ...;
3745 ///   *privn = &.privates.privn;
3746 /// }
3747 /// \endcode
3748 static llvm::Value *
3749 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3750                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3751                                ArrayRef<PrivateDataTy> Privates) {
3752   ASTContext &C = CGM.getContext();
3753   FunctionArgList Args;
3754   ImplicitParamDecl TaskPrivatesArg(
3755       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3756       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3757       ImplicitParamDecl::Other);
3758   Args.push_back(&TaskPrivatesArg);
3759   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3760   unsigned Counter = 1;
3761   for (const Expr *E : Data.PrivateVars) {
3762     Args.push_back(ImplicitParamDecl::Create(
3763         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3764         C.getPointerType(C.getPointerType(E->getType()))
3765             .withConst()
3766             .withRestrict(),
3767         ImplicitParamDecl::Other));
3768     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3769     PrivateVarsPos[VD] = Counter;
3770     ++Counter;
3771   }
3772   for (const Expr *E : Data.FirstprivateVars) {
3773     Args.push_back(ImplicitParamDecl::Create(
3774         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3775         C.getPointerType(C.getPointerType(E->getType()))
3776             .withConst()
3777             .withRestrict(),
3778         ImplicitParamDecl::Other));
3779     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3780     PrivateVarsPos[VD] = Counter;
3781     ++Counter;
3782   }
3783   for (const Expr *E : Data.LastprivateVars) {
3784     Args.push_back(ImplicitParamDecl::Create(
3785         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3786         C.getPointerType(C.getPointerType(E->getType()))
3787             .withConst()
3788             .withRestrict(),
3789         ImplicitParamDecl::Other));
3790     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3791     PrivateVarsPos[VD] = Counter;
3792     ++Counter;
3793   }
3794   for (const VarDecl *VD : Data.PrivateLocals) {
3795     QualType Ty = VD->getType().getNonReferenceType();
3796     if (VD->getType()->isLValueReferenceType())
3797       Ty = C.getPointerType(Ty);
3798     if (isAllocatableDecl(VD))
3799       Ty = C.getPointerType(Ty);
3800     Args.push_back(ImplicitParamDecl::Create(
3801         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3802         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3803         ImplicitParamDecl::Other));
3804     PrivateVarsPos[VD] = Counter;
3805     ++Counter;
3806   }
3807   const auto &TaskPrivatesMapFnInfo =
3808       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3809   llvm::FunctionType *TaskPrivatesMapTy =
3810       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3811   std::string Name =
3812       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3813   auto *TaskPrivatesMap = llvm::Function::Create(
3814       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3815       &CGM.getModule());
3816   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3817                                     TaskPrivatesMapFnInfo);
3818   if (CGM.getLangOpts().Optimize) {
3819     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3820     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3821     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3822   }
3823   CodeGenFunction CGF(CGM);
3824   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3825                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3826 
3827   // *privi = &.privates.privi;
3828   LValue Base = CGF.EmitLoadOfPointerLValue(
3829       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3830       TaskPrivatesArg.getType()->castAs<PointerType>());
3831   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3832   Counter = 0;
3833   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3834     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3835     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3836     LValue RefLVal =
3837         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3838     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3839         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3840     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3841     ++Counter;
3842   }
3843   CGF.FinishFunction();
3844   return TaskPrivatesMap;
3845 }
3846 
3847 /// Emit initialization for private variables in task-based directives.
3848 static void emitPrivatesInit(CodeGenFunction &CGF,
3849                              const OMPExecutableDirective &D,
3850                              Address KmpTaskSharedsPtr, LValue TDBase,
3851                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3852                              QualType SharedsTy, QualType SharedsPtrTy,
3853                              const OMPTaskDataTy &Data,
3854                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3855   ASTContext &C = CGF.getContext();
3856   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3857   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3858   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3859                                  ? OMPD_taskloop
3860                                  : OMPD_task;
3861   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3862   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3863   LValue SrcBase;
3864   bool IsTargetTask =
3865       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3866       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3867   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3868   // PointersArray, SizesArray, and MappersArray. The original variables for
3869   // these arrays are not captured and we get their addresses explicitly.
3870   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3871       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3872     SrcBase = CGF.MakeAddrLValue(
3873         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3874             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3875         SharedsTy);
3876   }
3877   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3878   for (const PrivateDataTy &Pair : Privates) {
3879     // Do not initialize private locals.
3880     if (Pair.second.isLocalPrivate()) {
3881       ++FI;
3882       continue;
3883     }
3884     const VarDecl *VD = Pair.second.PrivateCopy;
3885     const Expr *Init = VD->getAnyInitializer();
3886     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3887                              !CGF.isTrivialInitializer(Init)))) {
3888       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3889       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3890         const VarDecl *OriginalVD = Pair.second.Original;
3891         // Check if the variable is the target-based BasePointersArray,
3892         // PointersArray, SizesArray, or MappersArray.
3893         LValue SharedRefLValue;
3894         QualType Type = PrivateLValue.getType();
3895         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3896         if (IsTargetTask && !SharedField) {
3897           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3898                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3899                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3900                          ->getNumParams() == 0 &&
3901                  isa<TranslationUnitDecl>(
3902                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3903                          ->getDeclContext()) &&
3904                  "Expected artificial target data variable.");
3905           SharedRefLValue =
3906               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3907         } else if (ForDup) {
3908           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3909           SharedRefLValue = CGF.MakeAddrLValue(
3910               Address(SharedRefLValue.getPointer(CGF),
3911                       C.getDeclAlign(OriginalVD)),
3912               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3913               SharedRefLValue.getTBAAInfo());
3914         } else if (CGF.LambdaCaptureFields.count(
3915                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3916                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3917           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3918         } else {
3919           // Processing for implicitly captured variables.
3920           InlinedOpenMPRegionRAII Region(
3921               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3922               /*HasCancel=*/false, /*NoInheritance=*/true);
3923           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3924         }
3925         if (Type->isArrayType()) {
3926           // Initialize firstprivate array.
3927           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3928             // Perform simple memcpy.
3929             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3930           } else {
3931             // Initialize firstprivate array using element-by-element
3932             // initialization.
3933             CGF.EmitOMPAggregateAssign(
3934                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3935                 Type,
3936                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3937                                                   Address SrcElement) {
3938                   // Clean up any temporaries needed by the initialization.
3939                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3940                   InitScope.addPrivate(
3941                       Elem, [SrcElement]() -> Address { return SrcElement; });
3942                   (void)InitScope.Privatize();
3943                   // Emit initialization for single element.
3944                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3945                       CGF, &CapturesInfo);
3946                   CGF.EmitAnyExprToMem(Init, DestElement,
3947                                        Init->getType().getQualifiers(),
3948                                        /*IsInitializer=*/false);
3949                 });
3950           }
3951         } else {
3952           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3953           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3954             return SharedRefLValue.getAddress(CGF);
3955           });
3956           (void)InitScope.Privatize();
3957           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3958           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3959                              /*capturedByInit=*/false);
3960         }
3961       } else {
3962         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3963       }
3964     }
3965     ++FI;
3966   }
3967 }
3968 
3969 /// Check if duplication function is required for taskloops.
3970 static bool checkInitIsRequired(CodeGenFunction &CGF,
3971                                 ArrayRef<PrivateDataTy> Privates) {
3972   bool InitRequired = false;
3973   for (const PrivateDataTy &Pair : Privates) {
3974     if (Pair.second.isLocalPrivate())
3975       continue;
3976     const VarDecl *VD = Pair.second.PrivateCopy;
3977     const Expr *Init = VD->getAnyInitializer();
3978     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3979                                     !CGF.isTrivialInitializer(Init));
3980     if (InitRequired)
3981       break;
3982   }
3983   return InitRequired;
3984 }
3985 
3986 
3987 /// Emit task_dup function (for initialization of
3988 /// private/firstprivate/lastprivate vars and last_iter flag)
3989 /// \code
3990 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3991 /// lastpriv) {
3992 /// // setup lastprivate flag
3993 ///    task_dst->last = lastpriv;
3994 /// // could be constructor calls here...
3995 /// }
3996 /// \endcode
3997 static llvm::Value *
3998 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3999                     const OMPExecutableDirective &D,
4000                     QualType KmpTaskTWithPrivatesPtrQTy,
4001                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4002                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4003                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4004                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4005   ASTContext &C = CGM.getContext();
4006   FunctionArgList Args;
4007   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4008                            KmpTaskTWithPrivatesPtrQTy,
4009                            ImplicitParamDecl::Other);
4010   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4011                            KmpTaskTWithPrivatesPtrQTy,
4012                            ImplicitParamDecl::Other);
4013   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4014                                 ImplicitParamDecl::Other);
4015   Args.push_back(&DstArg);
4016   Args.push_back(&SrcArg);
4017   Args.push_back(&LastprivArg);
4018   const auto &TaskDupFnInfo =
4019       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4020   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4021   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4022   auto *TaskDup = llvm::Function::Create(
4023       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4024   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4025   TaskDup->setDoesNotRecurse();
4026   CodeGenFunction CGF(CGM);
4027   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4028                     Loc);
4029 
4030   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4031       CGF.GetAddrOfLocalVar(&DstArg),
4032       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4033   // task_dst->liter = lastpriv;
4034   if (WithLastIter) {
4035     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4036     LValue Base = CGF.EmitLValueForField(
4037         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4038     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4039     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4040         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4041     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4042   }
4043 
4044   // Emit initial values for private copies (if any).
4045   assert(!Privates.empty());
4046   Address KmpTaskSharedsPtr = Address::invalid();
4047   if (!Data.FirstprivateVars.empty()) {
4048     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4049         CGF.GetAddrOfLocalVar(&SrcArg),
4050         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4051     LValue Base = CGF.EmitLValueForField(
4052         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4053     KmpTaskSharedsPtr = Address(
4054         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4055                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4056                                                   KmpTaskTShareds)),
4057                              Loc),
4058         CGM.getNaturalTypeAlignment(SharedsTy));
4059   }
4060   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4061                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4062   CGF.FinishFunction();
4063   return TaskDup;
4064 }
4065 
4066 /// Checks if destructor function is required to be generated.
4067 /// \return true if cleanups are required, false otherwise.
4068 static bool
4069 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4070                          ArrayRef<PrivateDataTy> Privates) {
4071   for (const PrivateDataTy &P : Privates) {
4072     if (P.second.isLocalPrivate())
4073       continue;
4074     QualType Ty = P.second.Original->getType().getNonReferenceType();
4075     if (Ty.isDestructedType())
4076       return true;
4077   }
4078   return false;
4079 }
4080 
4081 namespace {
4082 /// Loop generator for OpenMP iterator expression.
4083 class OMPIteratorGeneratorScope final
4084     : public CodeGenFunction::OMPPrivateScope {
4085   CodeGenFunction &CGF;
4086   const OMPIteratorExpr *E = nullptr;
4087   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4088   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4089   OMPIteratorGeneratorScope() = delete;
4090   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4091 
4092 public:
4093   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4094       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4095     if (!E)
4096       return;
4097     SmallVector<llvm::Value *, 4> Uppers;
4098     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4099       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4100       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4101       addPrivate(VD, [&CGF, VD]() {
4102         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4103       });
4104       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4105       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4106         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4107                                  "counter.addr");
4108       });
4109     }
4110     Privatize();
4111 
4112     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4113       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4114       LValue CLVal =
4115           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4116                              HelperData.CounterVD->getType());
4117       // Counter = 0;
4118       CGF.EmitStoreOfScalar(
4119           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4120           CLVal);
4121       CodeGenFunction::JumpDest &ContDest =
4122           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4123       CodeGenFunction::JumpDest &ExitDest =
4124           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4125       // N = <number-of_iterations>;
4126       llvm::Value *N = Uppers[I];
4127       // cont:
4128       // if (Counter < N) goto body; else goto exit;
4129       CGF.EmitBlock(ContDest.getBlock());
4130       auto *CVal =
4131           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4132       llvm::Value *Cmp =
4133           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4134               ? CGF.Builder.CreateICmpSLT(CVal, N)
4135               : CGF.Builder.CreateICmpULT(CVal, N);
4136       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4137       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4138       // body:
4139       CGF.EmitBlock(BodyBB);
4140       // Iteri = Begini + Counter * Stepi;
4141       CGF.EmitIgnoredExpr(HelperData.Update);
4142     }
4143   }
4144   ~OMPIteratorGeneratorScope() {
4145     if (!E)
4146       return;
4147     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4148       // Counter = Counter + 1;
4149       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4150       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4151       // goto cont;
4152       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4153       // exit:
4154       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4155     }
4156   }
4157 };
4158 } // namespace
4159 
4160 static std::pair<llvm::Value *, llvm::Value *>
4161 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4162   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4163   llvm::Value *Addr;
4164   if (OASE) {
4165     const Expr *Base = OASE->getBase();
4166     Addr = CGF.EmitScalarExpr(Base);
4167   } else {
4168     Addr = CGF.EmitLValue(E).getPointer(CGF);
4169   }
4170   llvm::Value *SizeVal;
4171   QualType Ty = E->getType();
4172   if (OASE) {
4173     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4174     for (const Expr *SE : OASE->getDimensions()) {
4175       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4176       Sz = CGF.EmitScalarConversion(
4177           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4178       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4179     }
4180   } else if (const auto *ASE =
4181                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4182     LValue UpAddrLVal =
4183         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4184     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4185     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4186         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4187     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4188     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4189     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4190   } else {
4191     SizeVal = CGF.getTypeSize(Ty);
4192   }
4193   return std::make_pair(Addr, SizeVal);
4194 }
4195 
4196 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4197 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4198   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4199   if (KmpTaskAffinityInfoTy.isNull()) {
4200     RecordDecl *KmpAffinityInfoRD =
4201         C.buildImplicitRecord("kmp_task_affinity_info_t");
4202     KmpAffinityInfoRD->startDefinition();
4203     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4204     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4205     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4206     KmpAffinityInfoRD->completeDefinition();
4207     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4208   }
4209 }
4210 
4211 CGOpenMPRuntime::TaskResultTy
4212 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4213                               const OMPExecutableDirective &D,
4214                               llvm::Function *TaskFunction, QualType SharedsTy,
4215                               Address Shareds, const OMPTaskDataTy &Data) {
4216   ASTContext &C = CGM.getContext();
4217   llvm::SmallVector<PrivateDataTy, 4> Privates;
4218   // Aggregate privates and sort them by the alignment.
4219   const auto *I = Data.PrivateCopies.begin();
4220   for (const Expr *E : Data.PrivateVars) {
4221     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4222     Privates.emplace_back(
4223         C.getDeclAlign(VD),
4224         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4225                          /*PrivateElemInit=*/nullptr));
4226     ++I;
4227   }
4228   I = Data.FirstprivateCopies.begin();
4229   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4230   for (const Expr *E : Data.FirstprivateVars) {
4231     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4232     Privates.emplace_back(
4233         C.getDeclAlign(VD),
4234         PrivateHelpersTy(
4235             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4236             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4237     ++I;
4238     ++IElemInitRef;
4239   }
4240   I = Data.LastprivateCopies.begin();
4241   for (const Expr *E : Data.LastprivateVars) {
4242     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4243     Privates.emplace_back(
4244         C.getDeclAlign(VD),
4245         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4246                          /*PrivateElemInit=*/nullptr));
4247     ++I;
4248   }
4249   for (const VarDecl *VD : Data.PrivateLocals) {
4250     if (isAllocatableDecl(VD))
4251       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4252     else
4253       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4254   }
4255   llvm::stable_sort(Privates,
4256                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4257                       return L.first > R.first;
4258                     });
4259   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4260   // Build type kmp_routine_entry_t (if not built yet).
4261   emitKmpRoutineEntryT(KmpInt32Ty);
4262   // Build type kmp_task_t (if not built yet).
4263   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4264     if (SavedKmpTaskloopTQTy.isNull()) {
4265       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4266           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4267     }
4268     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4269   } else {
4270     assert((D.getDirectiveKind() == OMPD_task ||
4271             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4272             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4273            "Expected taskloop, task or target directive");
4274     if (SavedKmpTaskTQTy.isNull()) {
4275       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4276           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4277     }
4278     KmpTaskTQTy = SavedKmpTaskTQTy;
4279   }
4280   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4281   // Build particular struct kmp_task_t for the given task.
4282   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4283       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4284   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4285   QualType KmpTaskTWithPrivatesPtrQTy =
4286       C.getPointerType(KmpTaskTWithPrivatesQTy);
4287   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4288   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4289       KmpTaskTWithPrivatesTy->getPointerTo();
4290   llvm::Value *KmpTaskTWithPrivatesTySize =
4291       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4292   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4293 
4294   // Emit initial values for private copies (if any).
4295   llvm::Value *TaskPrivatesMap = nullptr;
4296   llvm::Type *TaskPrivatesMapTy =
4297       std::next(TaskFunction->arg_begin(), 3)->getType();
4298   if (!Privates.empty()) {
4299     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4300     TaskPrivatesMap =
4301         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4302     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4303         TaskPrivatesMap, TaskPrivatesMapTy);
4304   } else {
4305     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4306         cast<llvm::PointerType>(TaskPrivatesMapTy));
4307   }
4308   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4309   // kmp_task_t *tt);
4310   llvm::Function *TaskEntry = emitProxyTaskFunction(
4311       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4312       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4313       TaskPrivatesMap);
4314 
4315   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4316   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4317   // kmp_routine_entry_t *task_entry);
4318   // Task flags. Format is taken from
4319   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4320   // description of kmp_tasking_flags struct.
4321   enum {
4322     TiedFlag = 0x1,
4323     FinalFlag = 0x2,
4324     DestructorsFlag = 0x8,
4325     PriorityFlag = 0x20,
4326     DetachableFlag = 0x40,
4327   };
4328   unsigned Flags = Data.Tied ? TiedFlag : 0;
4329   bool NeedsCleanup = false;
4330   if (!Privates.empty()) {
4331     NeedsCleanup =
4332         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4333     if (NeedsCleanup)
4334       Flags = Flags | DestructorsFlag;
4335   }
4336   if (Data.Priority.getInt())
4337     Flags = Flags | PriorityFlag;
4338   if (D.hasClausesOfKind<OMPDetachClause>())
4339     Flags = Flags | DetachableFlag;
4340   llvm::Value *TaskFlags =
4341       Data.Final.getPointer()
4342           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4343                                      CGF.Builder.getInt32(FinalFlag),
4344                                      CGF.Builder.getInt32(/*C=*/0))
4345           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4346   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4347   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4348   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4349       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4350       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4351           TaskEntry, KmpRoutineEntryPtrTy)};
4352   llvm::Value *NewTask;
4353   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4354     // Check if we have any device clause associated with the directive.
4355     const Expr *Device = nullptr;
4356     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4357       Device = C->getDevice();
4358     // Emit device ID if any otherwise use default value.
4359     llvm::Value *DeviceID;
4360     if (Device)
4361       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4362                                            CGF.Int64Ty, /*isSigned=*/true);
4363     else
4364       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4365     AllocArgs.push_back(DeviceID);
4366     NewTask = CGF.EmitRuntimeCall(
4367         OMPBuilder.getOrCreateRuntimeFunction(
4368             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4369         AllocArgs);
4370   } else {
4371     NewTask =
4372         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4373                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4374                             AllocArgs);
4375   }
4376   // Emit detach clause initialization.
4377   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4378   // task_descriptor);
4379   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4380     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4381     LValue EvtLVal = CGF.EmitLValue(Evt);
4382 
4383     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4384     // int gtid, kmp_task_t *task);
4385     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4386     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4387     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4388     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4389         OMPBuilder.getOrCreateRuntimeFunction(
4390             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4391         {Loc, Tid, NewTask});
4392     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4393                                       Evt->getExprLoc());
4394     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4395   }
4396   // Process affinity clauses.
4397   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4398     // Process list of affinity data.
4399     ASTContext &C = CGM.getContext();
4400     Address AffinitiesArray = Address::invalid();
4401     // Calculate number of elements to form the array of affinity data.
4402     llvm::Value *NumOfElements = nullptr;
4403     unsigned NumAffinities = 0;
4404     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4405       if (const Expr *Modifier = C->getModifier()) {
4406         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4407         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4408           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4409           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4410           NumOfElements =
4411               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4412         }
4413       } else {
4414         NumAffinities += C->varlist_size();
4415       }
4416     }
4417     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4418     // Fields ids in kmp_task_affinity_info record.
4419     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4420 
4421     QualType KmpTaskAffinityInfoArrayTy;
4422     if (NumOfElements) {
4423       NumOfElements = CGF.Builder.CreateNUWAdd(
4424           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4425       auto *OVE = new (C) OpaqueValueExpr(
4426           Loc,
4427           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4428           VK_PRValue);
4429       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4430                                                     RValue::get(NumOfElements));
4431       KmpTaskAffinityInfoArrayTy =
4432           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4433                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4434       // Properly emit variable-sized array.
4435       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4436                                            ImplicitParamDecl::Other);
4437       CGF.EmitVarDecl(*PD);
4438       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4439       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4440                                                 /*isSigned=*/false);
4441     } else {
4442       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4443           KmpTaskAffinityInfoTy,
4444           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4445           ArrayType::Normal, /*IndexTypeQuals=*/0);
4446       AffinitiesArray =
4447           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4448       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4449       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4450                                              /*isSigned=*/false);
4451     }
4452 
4453     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4454     // Fill array by elements without iterators.
4455     unsigned Pos = 0;
4456     bool HasIterator = false;
4457     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4458       if (C->getModifier()) {
4459         HasIterator = true;
4460         continue;
4461       }
4462       for (const Expr *E : C->varlists()) {
4463         llvm::Value *Addr;
4464         llvm::Value *Size;
4465         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4466         LValue Base =
4467             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4468                                KmpTaskAffinityInfoTy);
4469         // affs[i].base_addr = &<Affinities[i].second>;
4470         LValue BaseAddrLVal = CGF.EmitLValueForField(
4471             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4472         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4473                               BaseAddrLVal);
4474         // affs[i].len = sizeof(<Affinities[i].second>);
4475         LValue LenLVal = CGF.EmitLValueForField(
4476             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4477         CGF.EmitStoreOfScalar(Size, LenLVal);
4478         ++Pos;
4479       }
4480     }
4481     LValue PosLVal;
4482     if (HasIterator) {
4483       PosLVal = CGF.MakeAddrLValue(
4484           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4485           C.getSizeType());
4486       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4487     }
4488     // Process elements with iterators.
4489     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4490       const Expr *Modifier = C->getModifier();
4491       if (!Modifier)
4492         continue;
4493       OMPIteratorGeneratorScope IteratorScope(
4494           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4495       for (const Expr *E : C->varlists()) {
4496         llvm::Value *Addr;
4497         llvm::Value *Size;
4498         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4499         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4500         LValue Base = CGF.MakeAddrLValue(
4501             Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4502                                           AffinitiesArray.getPointer(), Idx),
4503                     AffinitiesArray.getAlignment()),
4504             KmpTaskAffinityInfoTy);
4505         // affs[i].base_addr = &<Affinities[i].second>;
4506         LValue BaseAddrLVal = CGF.EmitLValueForField(
4507             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4508         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4509                               BaseAddrLVal);
4510         // affs[i].len = sizeof(<Affinities[i].second>);
4511         LValue LenLVal = CGF.EmitLValueForField(
4512             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4513         CGF.EmitStoreOfScalar(Size, LenLVal);
4514         Idx = CGF.Builder.CreateNUWAdd(
4515             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4516         CGF.EmitStoreOfScalar(Idx, PosLVal);
4517       }
4518     }
4519     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4520     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4521     // naffins, kmp_task_affinity_info_t *affin_list);
4522     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4523     llvm::Value *GTid = getThreadID(CGF, Loc);
4524     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4525         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4526     // FIXME: Emit the function and ignore its result for now unless the
4527     // runtime function is properly implemented.
4528     (void)CGF.EmitRuntimeCall(
4529         OMPBuilder.getOrCreateRuntimeFunction(
4530             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4531         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4532   }
4533   llvm::Value *NewTaskNewTaskTTy =
4534       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4535           NewTask, KmpTaskTWithPrivatesPtrTy);
4536   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4537                                                KmpTaskTWithPrivatesQTy);
4538   LValue TDBase =
4539       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4540   // Fill the data in the resulting kmp_task_t record.
4541   // Copy shareds if there are any.
4542   Address KmpTaskSharedsPtr = Address::invalid();
4543   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4544     KmpTaskSharedsPtr =
4545         Address(CGF.EmitLoadOfScalar(
4546                     CGF.EmitLValueForField(
4547                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4548                                            KmpTaskTShareds)),
4549                     Loc),
4550                 CGM.getNaturalTypeAlignment(SharedsTy));
4551     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4552     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4553     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4554   }
4555   // Emit initial values for private copies (if any).
4556   TaskResultTy Result;
4557   if (!Privates.empty()) {
4558     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4559                      SharedsTy, SharedsPtrTy, Data, Privates,
4560                      /*ForDup=*/false);
4561     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4562         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4563       Result.TaskDupFn = emitTaskDupFunction(
4564           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4565           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4566           /*WithLastIter=*/!Data.LastprivateVars.empty());
4567     }
4568   }
4569   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4570   enum { Priority = 0, Destructors = 1 };
4571   // Provide pointer to function with destructors for privates.
4572   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4573   const RecordDecl *KmpCmplrdataUD =
4574       (*FI)->getType()->getAsUnionType()->getDecl();
4575   if (NeedsCleanup) {
4576     llvm::Value *DestructorFn = emitDestructorsFunction(
4577         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4578         KmpTaskTWithPrivatesQTy);
4579     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4580     LValue DestructorsLV = CGF.EmitLValueForField(
4581         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4582     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4583                               DestructorFn, KmpRoutineEntryPtrTy),
4584                           DestructorsLV);
4585   }
4586   // Set priority.
4587   if (Data.Priority.getInt()) {
4588     LValue Data2LV = CGF.EmitLValueForField(
4589         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4590     LValue PriorityLV = CGF.EmitLValueForField(
4591         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4592     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4593   }
4594   Result.NewTask = NewTask;
4595   Result.TaskEntry = TaskEntry;
4596   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4597   Result.TDBase = TDBase;
4598   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4599   return Result;
4600 }
4601 
4602 namespace {
4603 /// Dependence kind for RTL.
4604 enum RTLDependenceKindTy {
4605   DepIn = 0x01,
4606   DepInOut = 0x3,
4607   DepMutexInOutSet = 0x4
4608 };
4609 /// Fields ids in kmp_depend_info record.
4610 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4611 } // namespace
4612 
4613 /// Translates internal dependency kind into the runtime kind.
4614 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4615   RTLDependenceKindTy DepKind;
4616   switch (K) {
4617   case OMPC_DEPEND_in:
4618     DepKind = DepIn;
4619     break;
4620   // Out and InOut dependencies must use the same code.
4621   case OMPC_DEPEND_out:
4622   case OMPC_DEPEND_inout:
4623     DepKind = DepInOut;
4624     break;
4625   case OMPC_DEPEND_mutexinoutset:
4626     DepKind = DepMutexInOutSet;
4627     break;
4628   case OMPC_DEPEND_source:
4629   case OMPC_DEPEND_sink:
4630   case OMPC_DEPEND_depobj:
4631   case OMPC_DEPEND_unknown:
4632     llvm_unreachable("Unknown task dependence type");
4633   }
4634   return DepKind;
4635 }
4636 
4637 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4638 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4639                            QualType &FlagsTy) {
4640   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4641   if (KmpDependInfoTy.isNull()) {
4642     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4643     KmpDependInfoRD->startDefinition();
4644     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4645     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4646     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4647     KmpDependInfoRD->completeDefinition();
4648     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4649   }
4650 }
4651 
4652 std::pair<llvm::Value *, LValue>
4653 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4654                                    SourceLocation Loc) {
4655   ASTContext &C = CGM.getContext();
4656   QualType FlagsTy;
4657   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4658   RecordDecl *KmpDependInfoRD =
4659       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4660   LValue Base = CGF.EmitLoadOfPointerLValue(
4661       DepobjLVal.getAddress(CGF),
4662       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4663   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4664   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4665           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4666   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4667                             Base.getTBAAInfo());
4668   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4669       Addr.getElementType(), Addr.getPointer(),
4670       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4671   LValue NumDepsBase = CGF.MakeAddrLValue(
4672       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4673       Base.getBaseInfo(), Base.getTBAAInfo());
4674   // NumDeps = deps[i].base_addr;
4675   LValue BaseAddrLVal = CGF.EmitLValueForField(
4676       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4677   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4678   return std::make_pair(NumDeps, Base);
4679 }
4680 
4681 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4682                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4683                            const OMPTaskDataTy::DependData &Data,
4684                            Address DependenciesArray) {
4685   CodeGenModule &CGM = CGF.CGM;
4686   ASTContext &C = CGM.getContext();
4687   QualType FlagsTy;
4688   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4689   RecordDecl *KmpDependInfoRD =
4690       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4691   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4692 
4693   OMPIteratorGeneratorScope IteratorScope(
4694       CGF, cast_or_null<OMPIteratorExpr>(
4695                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4696                                  : nullptr));
4697   for (const Expr *E : Data.DepExprs) {
4698     llvm::Value *Addr;
4699     llvm::Value *Size;
4700     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4701     LValue Base;
4702     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4703       Base = CGF.MakeAddrLValue(
4704           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4705     } else {
4706       LValue &PosLVal = *Pos.get<LValue *>();
4707       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4708       Base = CGF.MakeAddrLValue(
4709           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4710                                         DependenciesArray.getPointer(), Idx),
4711                   DependenciesArray.getAlignment()),
4712           KmpDependInfoTy);
4713     }
4714     // deps[i].base_addr = &<Dependencies[i].second>;
4715     LValue BaseAddrLVal = CGF.EmitLValueForField(
4716         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4717     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4718                           BaseAddrLVal);
4719     // deps[i].len = sizeof(<Dependencies[i].second>);
4720     LValue LenLVal = CGF.EmitLValueForField(
4721         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4722     CGF.EmitStoreOfScalar(Size, LenLVal);
4723     // deps[i].flags = <Dependencies[i].first>;
4724     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4725     LValue FlagsLVal = CGF.EmitLValueForField(
4726         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4727     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4728                           FlagsLVal);
4729     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4730       ++(*P);
4731     } else {
4732       LValue &PosLVal = *Pos.get<LValue *>();
4733       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4734       Idx = CGF.Builder.CreateNUWAdd(Idx,
4735                                      llvm::ConstantInt::get(Idx->getType(), 1));
4736       CGF.EmitStoreOfScalar(Idx, PosLVal);
4737     }
4738   }
4739 }
4740 
4741 static SmallVector<llvm::Value *, 4>
4742 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4743                         const OMPTaskDataTy::DependData &Data) {
4744   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4745          "Expected depobj dependecy kind.");
4746   SmallVector<llvm::Value *, 4> Sizes;
4747   SmallVector<LValue, 4> SizeLVals;
4748   ASTContext &C = CGF.getContext();
4749   QualType FlagsTy;
4750   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4751   RecordDecl *KmpDependInfoRD =
4752       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4753   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4754   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4755   {
4756     OMPIteratorGeneratorScope IteratorScope(
4757         CGF, cast_or_null<OMPIteratorExpr>(
4758                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4759                                    : nullptr));
4760     for (const Expr *E : Data.DepExprs) {
4761       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4762       LValue Base = CGF.EmitLoadOfPointerLValue(
4763           DepobjLVal.getAddress(CGF),
4764           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4765       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4766           Base.getAddress(CGF), KmpDependInfoPtrT);
4767       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4768                                 Base.getTBAAInfo());
4769       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4770           Addr.getElementType(), Addr.getPointer(),
4771           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4772       LValue NumDepsBase = CGF.MakeAddrLValue(
4773           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4774           Base.getBaseInfo(), Base.getTBAAInfo());
4775       // NumDeps = deps[i].base_addr;
4776       LValue BaseAddrLVal = CGF.EmitLValueForField(
4777           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4778       llvm::Value *NumDeps =
4779           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4780       LValue NumLVal = CGF.MakeAddrLValue(
4781           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4782           C.getUIntPtrType());
4783       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4784                               NumLVal.getAddress(CGF));
4785       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4786       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4787       CGF.EmitStoreOfScalar(Add, NumLVal);
4788       SizeLVals.push_back(NumLVal);
4789     }
4790   }
4791   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4792     llvm::Value *Size =
4793         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4794     Sizes.push_back(Size);
4795   }
4796   return Sizes;
4797 }
4798 
4799 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4800                                LValue PosLVal,
4801                                const OMPTaskDataTy::DependData &Data,
4802                                Address DependenciesArray) {
4803   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4804          "Expected depobj dependecy kind.");
4805   ASTContext &C = CGF.getContext();
4806   QualType FlagsTy;
4807   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4808   RecordDecl *KmpDependInfoRD =
4809       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4810   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4811   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4812   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4813   {
4814     OMPIteratorGeneratorScope IteratorScope(
4815         CGF, cast_or_null<OMPIteratorExpr>(
4816                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4817                                    : nullptr));
4818     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4819       const Expr *E = Data.DepExprs[I];
4820       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4821       LValue Base = CGF.EmitLoadOfPointerLValue(
4822           DepobjLVal.getAddress(CGF),
4823           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4824       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4825           Base.getAddress(CGF), KmpDependInfoPtrT);
4826       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4827                                 Base.getTBAAInfo());
4828 
4829       // Get number of elements in a single depobj.
4830       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4831           Addr.getElementType(), Addr.getPointer(),
4832           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4833       LValue NumDepsBase = CGF.MakeAddrLValue(
4834           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4835           Base.getBaseInfo(), Base.getTBAAInfo());
4836       // NumDeps = deps[i].base_addr;
4837       LValue BaseAddrLVal = CGF.EmitLValueForField(
4838           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4839       llvm::Value *NumDeps =
4840           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4841 
4842       // memcopy dependency data.
4843       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4844           ElSize,
4845           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4846       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4847       Address DepAddr =
4848           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4849                                         DependenciesArray.getPointer(), Pos),
4850                   DependenciesArray.getAlignment());
4851       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4852 
4853       // Increase pos.
4854       // pos += size;
4855       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4856       CGF.EmitStoreOfScalar(Add, PosLVal);
4857     }
4858   }
4859 }
4860 
4861 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4862     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4863     SourceLocation Loc) {
4864   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4865         return D.DepExprs.empty();
4866       }))
4867     return std::make_pair(nullptr, Address::invalid());
4868   // Process list of dependencies.
4869   ASTContext &C = CGM.getContext();
4870   Address DependenciesArray = Address::invalid();
4871   llvm::Value *NumOfElements = nullptr;
4872   unsigned NumDependencies = std::accumulate(
4873       Dependencies.begin(), Dependencies.end(), 0,
4874       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4875         return D.DepKind == OMPC_DEPEND_depobj
4876                    ? V
4877                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4878       });
4879   QualType FlagsTy;
4880   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4881   bool HasDepobjDeps = false;
4882   bool HasRegularWithIterators = false;
4883   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4884   llvm::Value *NumOfRegularWithIterators =
4885       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4886   // Calculate number of depobj dependecies and regular deps with the iterators.
4887   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4888     if (D.DepKind == OMPC_DEPEND_depobj) {
4889       SmallVector<llvm::Value *, 4> Sizes =
4890           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4891       for (llvm::Value *Size : Sizes) {
4892         NumOfDepobjElements =
4893             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4894       }
4895       HasDepobjDeps = true;
4896       continue;
4897     }
4898     // Include number of iterations, if any.
4899 
4900     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4901       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4902         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4903         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4904         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4905             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4906         NumOfRegularWithIterators =
4907             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4908       }
4909       HasRegularWithIterators = true;
4910       continue;
4911     }
4912   }
4913 
4914   QualType KmpDependInfoArrayTy;
4915   if (HasDepobjDeps || HasRegularWithIterators) {
4916     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4917                                            /*isSigned=*/false);
4918     if (HasDepobjDeps) {
4919       NumOfElements =
4920           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4921     }
4922     if (HasRegularWithIterators) {
4923       NumOfElements =
4924           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4925     }
4926     auto *OVE = new (C) OpaqueValueExpr(
4927         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4928         VK_PRValue);
4929     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4930                                                   RValue::get(NumOfElements));
4931     KmpDependInfoArrayTy =
4932         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4933                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4934     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4935     // Properly emit variable-sized array.
4936     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4937                                          ImplicitParamDecl::Other);
4938     CGF.EmitVarDecl(*PD);
4939     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4940     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4941                                               /*isSigned=*/false);
4942   } else {
4943     KmpDependInfoArrayTy = C.getConstantArrayType(
4944         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4945         ArrayType::Normal, /*IndexTypeQuals=*/0);
4946     DependenciesArray =
4947         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4948     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4949     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4950                                            /*isSigned=*/false);
4951   }
4952   unsigned Pos = 0;
4953   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4954     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4955         Dependencies[I].IteratorExpr)
4956       continue;
4957     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4958                    DependenciesArray);
4959   }
4960   // Copy regular dependecies with iterators.
4961   LValue PosLVal = CGF.MakeAddrLValue(
4962       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4963   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4964   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4965     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4966         !Dependencies[I].IteratorExpr)
4967       continue;
4968     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4969                    DependenciesArray);
4970   }
4971   // Copy final depobj arrays without iterators.
4972   if (HasDepobjDeps) {
4973     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4974       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4975         continue;
4976       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4977                          DependenciesArray);
4978     }
4979   }
4980   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4981       DependenciesArray, CGF.VoidPtrTy);
4982   return std::make_pair(NumOfElements, DependenciesArray);
4983 }
4984 
4985 Address CGOpenMPRuntime::emitDepobjDependClause(
4986     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4987     SourceLocation Loc) {
4988   if (Dependencies.DepExprs.empty())
4989     return Address::invalid();
4990   // Process list of dependencies.
4991   ASTContext &C = CGM.getContext();
4992   Address DependenciesArray = Address::invalid();
4993   unsigned NumDependencies = Dependencies.DepExprs.size();
4994   QualType FlagsTy;
4995   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4996   RecordDecl *KmpDependInfoRD =
4997       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4998 
4999   llvm::Value *Size;
5000   // Define type kmp_depend_info[<Dependencies.size()>];
5001   // For depobj reserve one extra element to store the number of elements.
5002   // It is required to handle depobj(x) update(in) construct.
5003   // kmp_depend_info[<Dependencies.size()>] deps;
5004   llvm::Value *NumDepsVal;
5005   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
5006   if (const auto *IE =
5007           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
5008     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
5009     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5010       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5011       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5012       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5013     }
5014     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5015                                     NumDepsVal);
5016     CharUnits SizeInBytes =
5017         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5018     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5019     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5020     NumDepsVal =
5021         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5022   } else {
5023     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5024         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5025         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5026     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5027     Size = CGM.getSize(Sz.alignTo(Align));
5028     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5029   }
5030   // Need to allocate on the dynamic memory.
5031   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5032   // Use default allocator.
5033   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5034   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5035 
5036   llvm::Value *Addr =
5037       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5038                               CGM.getModule(), OMPRTL___kmpc_alloc),
5039                           Args, ".dep.arr.addr");
5040   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5041       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5042   DependenciesArray = Address(Addr, Align);
5043   // Write number of elements in the first element of array for depobj.
5044   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5045   // deps[i].base_addr = NumDependencies;
5046   LValue BaseAddrLVal = CGF.EmitLValueForField(
5047       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5048   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5049   llvm::PointerUnion<unsigned *, LValue *> Pos;
5050   unsigned Idx = 1;
5051   LValue PosLVal;
5052   if (Dependencies.IteratorExpr) {
5053     PosLVal = CGF.MakeAddrLValue(
5054         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5055         C.getSizeType());
5056     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5057                           /*IsInit=*/true);
5058     Pos = &PosLVal;
5059   } else {
5060     Pos = &Idx;
5061   }
5062   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5063   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5064       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5065   return DependenciesArray;
5066 }
5067 
5068 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5069                                         SourceLocation Loc) {
5070   ASTContext &C = CGM.getContext();
5071   QualType FlagsTy;
5072   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5073   LValue Base = CGF.EmitLoadOfPointerLValue(
5074       DepobjLVal.getAddress(CGF),
5075       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5076   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5077   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5078       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5079   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5080       Addr.getElementType(), Addr.getPointer(),
5081       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5082   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5083                                                                CGF.VoidPtrTy);
5084   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5085   // Use default allocator.
5086   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5087   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5088 
5089   // _kmpc_free(gtid, addr, nullptr);
5090   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5091                                 CGM.getModule(), OMPRTL___kmpc_free),
5092                             Args);
5093 }
5094 
5095 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5096                                        OpenMPDependClauseKind NewDepKind,
5097                                        SourceLocation Loc) {
5098   ASTContext &C = CGM.getContext();
5099   QualType FlagsTy;
5100   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5101   RecordDecl *KmpDependInfoRD =
5102       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5103   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5104   llvm::Value *NumDeps;
5105   LValue Base;
5106   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5107 
5108   Address Begin = Base.getAddress(CGF);
5109   // Cast from pointer to array type to pointer to single element.
5110   llvm::Value *End = CGF.Builder.CreateGEP(
5111       Begin.getElementType(), Begin.getPointer(), NumDeps);
5112   // The basic structure here is a while-do loop.
5113   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5114   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5115   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5116   CGF.EmitBlock(BodyBB);
5117   llvm::PHINode *ElementPHI =
5118       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5119   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5120   Begin = Address(ElementPHI, Begin.getAlignment());
5121   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5122                             Base.getTBAAInfo());
5123   // deps[i].flags = NewDepKind;
5124   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5125   LValue FlagsLVal = CGF.EmitLValueForField(
5126       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5127   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5128                         FlagsLVal);
5129 
5130   // Shift the address forward by one element.
5131   Address ElementNext =
5132       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5133   ElementPHI->addIncoming(ElementNext.getPointer(),
5134                           CGF.Builder.GetInsertBlock());
5135   llvm::Value *IsEmpty =
5136       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5137   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5138   // Done.
5139   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5140 }
5141 
5142 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5143                                    const OMPExecutableDirective &D,
5144                                    llvm::Function *TaskFunction,
5145                                    QualType SharedsTy, Address Shareds,
5146                                    const Expr *IfCond,
5147                                    const OMPTaskDataTy &Data) {
5148   if (!CGF.HaveInsertPoint())
5149     return;
5150 
5151   TaskResultTy Result =
5152       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5153   llvm::Value *NewTask = Result.NewTask;
5154   llvm::Function *TaskEntry = Result.TaskEntry;
5155   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5156   LValue TDBase = Result.TDBase;
5157   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5158   // Process list of dependences.
5159   Address DependenciesArray = Address::invalid();
5160   llvm::Value *NumOfElements;
5161   std::tie(NumOfElements, DependenciesArray) =
5162       emitDependClause(CGF, Data.Dependences, Loc);
5163 
5164   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5165   // libcall.
5166   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5167   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5168   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5169   // list is not empty
5170   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5171   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5172   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5173   llvm::Value *DepTaskArgs[7];
5174   if (!Data.Dependences.empty()) {
5175     DepTaskArgs[0] = UpLoc;
5176     DepTaskArgs[1] = ThreadID;
5177     DepTaskArgs[2] = NewTask;
5178     DepTaskArgs[3] = NumOfElements;
5179     DepTaskArgs[4] = DependenciesArray.getPointer();
5180     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5181     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5182   }
5183   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5184                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5185     if (!Data.Tied) {
5186       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5187       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5188       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5189     }
5190     if (!Data.Dependences.empty()) {
5191       CGF.EmitRuntimeCall(
5192           OMPBuilder.getOrCreateRuntimeFunction(
5193               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5194           DepTaskArgs);
5195     } else {
5196       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5197                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5198                           TaskArgs);
5199     }
5200     // Check if parent region is untied and build return for untied task;
5201     if (auto *Region =
5202             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5203       Region->emitUntiedSwitch(CGF);
5204   };
5205 
5206   llvm::Value *DepWaitTaskArgs[6];
5207   if (!Data.Dependences.empty()) {
5208     DepWaitTaskArgs[0] = UpLoc;
5209     DepWaitTaskArgs[1] = ThreadID;
5210     DepWaitTaskArgs[2] = NumOfElements;
5211     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5212     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5213     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5214   }
5215   auto &M = CGM.getModule();
5216   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5217                         TaskEntry, &Data, &DepWaitTaskArgs,
5218                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5219     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5220     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5221     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5222     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5223     // is specified.
5224     if (!Data.Dependences.empty())
5225       CGF.EmitRuntimeCall(
5226           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5227           DepWaitTaskArgs);
5228     // Call proxy_task_entry(gtid, new_task);
5229     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5230                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5231       Action.Enter(CGF);
5232       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5233       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5234                                                           OutlinedFnArgs);
5235     };
5236 
5237     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5238     // kmp_task_t *new_task);
5239     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5240     // kmp_task_t *new_task);
5241     RegionCodeGenTy RCG(CodeGen);
5242     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5243                               M, OMPRTL___kmpc_omp_task_begin_if0),
5244                           TaskArgs,
5245                           OMPBuilder.getOrCreateRuntimeFunction(
5246                               M, OMPRTL___kmpc_omp_task_complete_if0),
5247                           TaskArgs);
5248     RCG.setAction(Action);
5249     RCG(CGF);
5250   };
5251 
5252   if (IfCond) {
5253     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5254   } else {
5255     RegionCodeGenTy ThenRCG(ThenCodeGen);
5256     ThenRCG(CGF);
5257   }
5258 }
5259 
5260 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5261                                        const OMPLoopDirective &D,
5262                                        llvm::Function *TaskFunction,
5263                                        QualType SharedsTy, Address Shareds,
5264                                        const Expr *IfCond,
5265                                        const OMPTaskDataTy &Data) {
5266   if (!CGF.HaveInsertPoint())
5267     return;
5268   TaskResultTy Result =
5269       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5270   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5271   // libcall.
5272   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5273   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5274   // sched, kmp_uint64 grainsize, void *task_dup);
5275   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5276   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5277   llvm::Value *IfVal;
5278   if (IfCond) {
5279     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5280                                       /*isSigned=*/true);
5281   } else {
5282     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5283   }
5284 
5285   LValue LBLVal = CGF.EmitLValueForField(
5286       Result.TDBase,
5287       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5288   const auto *LBVar =
5289       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5290   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5291                        LBLVal.getQuals(),
5292                        /*IsInitializer=*/true);
5293   LValue UBLVal = CGF.EmitLValueForField(
5294       Result.TDBase,
5295       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5296   const auto *UBVar =
5297       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5298   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5299                        UBLVal.getQuals(),
5300                        /*IsInitializer=*/true);
5301   LValue StLVal = CGF.EmitLValueForField(
5302       Result.TDBase,
5303       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5304   const auto *StVar =
5305       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5306   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5307                        StLVal.getQuals(),
5308                        /*IsInitializer=*/true);
5309   // Store reductions address.
5310   LValue RedLVal = CGF.EmitLValueForField(
5311       Result.TDBase,
5312       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5313   if (Data.Reductions) {
5314     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5315   } else {
5316     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5317                                CGF.getContext().VoidPtrTy);
5318   }
5319   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5320   llvm::Value *TaskArgs[] = {
5321       UpLoc,
5322       ThreadID,
5323       Result.NewTask,
5324       IfVal,
5325       LBLVal.getPointer(CGF),
5326       UBLVal.getPointer(CGF),
5327       CGF.EmitLoadOfScalar(StLVal, Loc),
5328       llvm::ConstantInt::getSigned(
5329           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5330       llvm::ConstantInt::getSigned(
5331           CGF.IntTy, Data.Schedule.getPointer()
5332                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5333                          : NoSchedule),
5334       Data.Schedule.getPointer()
5335           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5336                                       /*isSigned=*/false)
5337           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5338       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5339                              Result.TaskDupFn, CGF.VoidPtrTy)
5340                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5341   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5342                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5343                       TaskArgs);
5344 }
5345 
5346 /// Emit reduction operation for each element of array (required for
5347 /// array sections) LHS op = RHS.
5348 /// \param Type Type of array.
5349 /// \param LHSVar Variable on the left side of the reduction operation
5350 /// (references element of array in original variable).
5351 /// \param RHSVar Variable on the right side of the reduction operation
5352 /// (references element of array in original variable).
5353 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5354 /// RHSVar.
5355 static void EmitOMPAggregateReduction(
5356     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5357     const VarDecl *RHSVar,
5358     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5359                                   const Expr *, const Expr *)> &RedOpGen,
5360     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5361     const Expr *UpExpr = nullptr) {
5362   // Perform element-by-element initialization.
5363   QualType ElementTy;
5364   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5365   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5366 
5367   // Drill down to the base element type on both arrays.
5368   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5369   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5370 
5371   llvm::Value *RHSBegin = RHSAddr.getPointer();
5372   llvm::Value *LHSBegin = LHSAddr.getPointer();
5373   // Cast from pointer to array type to pointer to single element.
5374   llvm::Value *LHSEnd =
5375       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5376   // The basic structure here is a while-do loop.
5377   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5378   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5379   llvm::Value *IsEmpty =
5380       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5381   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5382 
5383   // Enter the loop body, making that address the current address.
5384   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5385   CGF.EmitBlock(BodyBB);
5386 
5387   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5388 
5389   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5390       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5391   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5392   Address RHSElementCurrent =
5393       Address(RHSElementPHI,
5394               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5395 
5396   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5397       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5398   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5399   Address LHSElementCurrent =
5400       Address(LHSElementPHI,
5401               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5402 
5403   // Emit copy.
5404   CodeGenFunction::OMPPrivateScope Scope(CGF);
5405   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5406   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5407   Scope.Privatize();
5408   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5409   Scope.ForceCleanup();
5410 
5411   // Shift the address forward by one element.
5412   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5413       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5414       "omp.arraycpy.dest.element");
5415   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5416       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5417       "omp.arraycpy.src.element");
5418   // Check whether we've reached the end.
5419   llvm::Value *Done =
5420       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5421   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5422   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5423   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5424 
5425   // Done.
5426   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5427 }
5428 
5429 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5430 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5431 /// UDR combiner function.
5432 static void emitReductionCombiner(CodeGenFunction &CGF,
5433                                   const Expr *ReductionOp) {
5434   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5435     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5436       if (const auto *DRE =
5437               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5438         if (const auto *DRD =
5439                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5440           std::pair<llvm::Function *, llvm::Function *> Reduction =
5441               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5442           RValue Func = RValue::get(Reduction.first);
5443           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5444           CGF.EmitIgnoredExpr(ReductionOp);
5445           return;
5446         }
5447   CGF.EmitIgnoredExpr(ReductionOp);
5448 }
5449 
5450 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5451     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5452     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5453     ArrayRef<const Expr *> ReductionOps) {
5454   ASTContext &C = CGM.getContext();
5455 
5456   // void reduction_func(void *LHSArg, void *RHSArg);
5457   FunctionArgList Args;
5458   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5459                            ImplicitParamDecl::Other);
5460   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5461                            ImplicitParamDecl::Other);
5462   Args.push_back(&LHSArg);
5463   Args.push_back(&RHSArg);
5464   const auto &CGFI =
5465       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5466   std::string Name = getName({"omp", "reduction", "reduction_func"});
5467   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5468                                     llvm::GlobalValue::InternalLinkage, Name,
5469                                     &CGM.getModule());
5470   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5471   Fn->setDoesNotRecurse();
5472   CodeGenFunction CGF(CGM);
5473   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5474 
5475   // Dst = (void*[n])(LHSArg);
5476   // Src = (void*[n])(RHSArg);
5477   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5478       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5479       ArgsType), CGF.getPointerAlign());
5480   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5481       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5482       ArgsType), CGF.getPointerAlign());
5483 
5484   //  ...
5485   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5486   //  ...
5487   CodeGenFunction::OMPPrivateScope Scope(CGF);
5488   auto IPriv = Privates.begin();
5489   unsigned Idx = 0;
5490   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5491     const auto *RHSVar =
5492         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5493     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5494       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5495     });
5496     const auto *LHSVar =
5497         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5498     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5499       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5500     });
5501     QualType PrivTy = (*IPriv)->getType();
5502     if (PrivTy->isVariablyModifiedType()) {
5503       // Get array size and emit VLA type.
5504       ++Idx;
5505       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5506       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5507       const VariableArrayType *VLA =
5508           CGF.getContext().getAsVariableArrayType(PrivTy);
5509       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5510       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5511           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5512       CGF.EmitVariablyModifiedType(PrivTy);
5513     }
5514   }
5515   Scope.Privatize();
5516   IPriv = Privates.begin();
5517   auto ILHS = LHSExprs.begin();
5518   auto IRHS = RHSExprs.begin();
5519   for (const Expr *E : ReductionOps) {
5520     if ((*IPriv)->getType()->isArrayType()) {
5521       // Emit reduction for array section.
5522       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5523       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5524       EmitOMPAggregateReduction(
5525           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5526           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5527             emitReductionCombiner(CGF, E);
5528           });
5529     } else {
5530       // Emit reduction for array subscript or single variable.
5531       emitReductionCombiner(CGF, E);
5532     }
5533     ++IPriv;
5534     ++ILHS;
5535     ++IRHS;
5536   }
5537   Scope.ForceCleanup();
5538   CGF.FinishFunction();
5539   return Fn;
5540 }
5541 
5542 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5543                                                   const Expr *ReductionOp,
5544                                                   const Expr *PrivateRef,
5545                                                   const DeclRefExpr *LHS,
5546                                                   const DeclRefExpr *RHS) {
5547   if (PrivateRef->getType()->isArrayType()) {
5548     // Emit reduction for array section.
5549     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5550     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5551     EmitOMPAggregateReduction(
5552         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5553         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5554           emitReductionCombiner(CGF, ReductionOp);
5555         });
5556   } else {
5557     // Emit reduction for array subscript or single variable.
5558     emitReductionCombiner(CGF, ReductionOp);
5559   }
5560 }
5561 
5562 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5563                                     ArrayRef<const Expr *> Privates,
5564                                     ArrayRef<const Expr *> LHSExprs,
5565                                     ArrayRef<const Expr *> RHSExprs,
5566                                     ArrayRef<const Expr *> ReductionOps,
5567                                     ReductionOptionsTy Options) {
5568   if (!CGF.HaveInsertPoint())
5569     return;
5570 
5571   bool WithNowait = Options.WithNowait;
5572   bool SimpleReduction = Options.SimpleReduction;
5573 
5574   // Next code should be emitted for reduction:
5575   //
5576   // static kmp_critical_name lock = { 0 };
5577   //
5578   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5579   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5580   //  ...
5581   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5582   //  *(Type<n>-1*)rhs[<n>-1]);
5583   // }
5584   //
5585   // ...
5586   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5587   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5588   // RedList, reduce_func, &<lock>)) {
5589   // case 1:
5590   //  ...
5591   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5592   //  ...
5593   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5594   // break;
5595   // case 2:
5596   //  ...
5597   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5598   //  ...
5599   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5600   // break;
5601   // default:;
5602   // }
5603   //
5604   // if SimpleReduction is true, only the next code is generated:
5605   //  ...
5606   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5607   //  ...
5608 
5609   ASTContext &C = CGM.getContext();
5610 
5611   if (SimpleReduction) {
5612     CodeGenFunction::RunCleanupsScope Scope(CGF);
5613     auto IPriv = Privates.begin();
5614     auto ILHS = LHSExprs.begin();
5615     auto IRHS = RHSExprs.begin();
5616     for (const Expr *E : ReductionOps) {
5617       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5618                                   cast<DeclRefExpr>(*IRHS));
5619       ++IPriv;
5620       ++ILHS;
5621       ++IRHS;
5622     }
5623     return;
5624   }
5625 
5626   // 1. Build a list of reduction variables.
5627   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5628   auto Size = RHSExprs.size();
5629   for (const Expr *E : Privates) {
5630     if (E->getType()->isVariablyModifiedType())
5631       // Reserve place for array size.
5632       ++Size;
5633   }
5634   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5635   QualType ReductionArrayTy =
5636       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5637                              /*IndexTypeQuals=*/0);
5638   Address ReductionList =
5639       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5640   auto IPriv = Privates.begin();
5641   unsigned Idx = 0;
5642   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5643     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5644     CGF.Builder.CreateStore(
5645         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5646             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5647         Elem);
5648     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5649       // Store array size.
5650       ++Idx;
5651       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5652       llvm::Value *Size = CGF.Builder.CreateIntCast(
5653           CGF.getVLASize(
5654                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5655               .NumElts,
5656           CGF.SizeTy, /*isSigned=*/false);
5657       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5658                               Elem);
5659     }
5660   }
5661 
5662   // 2. Emit reduce_func().
5663   llvm::Function *ReductionFn = emitReductionFunction(
5664       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5665       LHSExprs, RHSExprs, ReductionOps);
5666 
5667   // 3. Create static kmp_critical_name lock = { 0 };
5668   std::string Name = getName({"reduction"});
5669   llvm::Value *Lock = getCriticalRegionLock(Name);
5670 
5671   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5672   // RedList, reduce_func, &<lock>);
5673   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5674   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5675   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5676   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5677       ReductionList.getPointer(), CGF.VoidPtrTy);
5678   llvm::Value *Args[] = {
5679       IdentTLoc,                             // ident_t *<loc>
5680       ThreadId,                              // i32 <gtid>
5681       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5682       ReductionArrayTySize,                  // size_type sizeof(RedList)
5683       RL,                                    // void *RedList
5684       ReductionFn, // void (*) (void *, void *) <reduce_func>
5685       Lock         // kmp_critical_name *&<lock>
5686   };
5687   llvm::Value *Res = CGF.EmitRuntimeCall(
5688       OMPBuilder.getOrCreateRuntimeFunction(
5689           CGM.getModule(),
5690           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5691       Args);
5692 
5693   // 5. Build switch(res)
5694   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5695   llvm::SwitchInst *SwInst =
5696       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5697 
5698   // 6. Build case 1:
5699   //  ...
5700   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5701   //  ...
5702   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5703   // break;
5704   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5705   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5706   CGF.EmitBlock(Case1BB);
5707 
5708   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5709   llvm::Value *EndArgs[] = {
5710       IdentTLoc, // ident_t *<loc>
5711       ThreadId,  // i32 <gtid>
5712       Lock       // kmp_critical_name *&<lock>
5713   };
5714   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5715                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5716     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5717     auto IPriv = Privates.begin();
5718     auto ILHS = LHSExprs.begin();
5719     auto IRHS = RHSExprs.begin();
5720     for (const Expr *E : ReductionOps) {
5721       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5722                                      cast<DeclRefExpr>(*IRHS));
5723       ++IPriv;
5724       ++ILHS;
5725       ++IRHS;
5726     }
5727   };
5728   RegionCodeGenTy RCG(CodeGen);
5729   CommonActionTy Action(
5730       nullptr, llvm::None,
5731       OMPBuilder.getOrCreateRuntimeFunction(
5732           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5733                                       : OMPRTL___kmpc_end_reduce),
5734       EndArgs);
5735   RCG.setAction(Action);
5736   RCG(CGF);
5737 
5738   CGF.EmitBranch(DefaultBB);
5739 
5740   // 7. Build case 2:
5741   //  ...
5742   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5743   //  ...
5744   // break;
5745   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5746   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5747   CGF.EmitBlock(Case2BB);
5748 
5749   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5750                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5751     auto ILHS = LHSExprs.begin();
5752     auto IRHS = RHSExprs.begin();
5753     auto IPriv = Privates.begin();
5754     for (const Expr *E : ReductionOps) {
5755       const Expr *XExpr = nullptr;
5756       const Expr *EExpr = nullptr;
5757       const Expr *UpExpr = nullptr;
5758       BinaryOperatorKind BO = BO_Comma;
5759       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5760         if (BO->getOpcode() == BO_Assign) {
5761           XExpr = BO->getLHS();
5762           UpExpr = BO->getRHS();
5763         }
5764       }
5765       // Try to emit update expression as a simple atomic.
5766       const Expr *RHSExpr = UpExpr;
5767       if (RHSExpr) {
5768         // Analyze RHS part of the whole expression.
5769         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5770                 RHSExpr->IgnoreParenImpCasts())) {
5771           // If this is a conditional operator, analyze its condition for
5772           // min/max reduction operator.
5773           RHSExpr = ACO->getCond();
5774         }
5775         if (const auto *BORHS =
5776                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5777           EExpr = BORHS->getRHS();
5778           BO = BORHS->getOpcode();
5779         }
5780       }
5781       if (XExpr) {
5782         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5783         auto &&AtomicRedGen = [BO, VD,
5784                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5785                                     const Expr *EExpr, const Expr *UpExpr) {
5786           LValue X = CGF.EmitLValue(XExpr);
5787           RValue E;
5788           if (EExpr)
5789             E = CGF.EmitAnyExpr(EExpr);
5790           CGF.EmitOMPAtomicSimpleUpdateExpr(
5791               X, E, BO, /*IsXLHSInRHSPart=*/true,
5792               llvm::AtomicOrdering::Monotonic, Loc,
5793               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5794                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5795                 PrivateScope.addPrivate(
5796                     VD, [&CGF, VD, XRValue, Loc]() {
5797                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5798                       CGF.emitOMPSimpleStore(
5799                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5800                           VD->getType().getNonReferenceType(), Loc);
5801                       return LHSTemp;
5802                     });
5803                 (void)PrivateScope.Privatize();
5804                 return CGF.EmitAnyExpr(UpExpr);
5805               });
5806         };
5807         if ((*IPriv)->getType()->isArrayType()) {
5808           // Emit atomic reduction for array section.
5809           const auto *RHSVar =
5810               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5811           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5812                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5813         } else {
5814           // Emit atomic reduction for array subscript or single variable.
5815           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5816         }
5817       } else {
5818         // Emit as a critical region.
5819         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5820                                            const Expr *, const Expr *) {
5821           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5822           std::string Name = RT.getName({"atomic_reduction"});
5823           RT.emitCriticalRegion(
5824               CGF, Name,
5825               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5826                 Action.Enter(CGF);
5827                 emitReductionCombiner(CGF, E);
5828               },
5829               Loc);
5830         };
5831         if ((*IPriv)->getType()->isArrayType()) {
5832           const auto *LHSVar =
5833               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5834           const auto *RHSVar =
5835               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5836           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5837                                     CritRedGen);
5838         } else {
5839           CritRedGen(CGF, nullptr, nullptr, nullptr);
5840         }
5841       }
5842       ++ILHS;
5843       ++IRHS;
5844       ++IPriv;
5845     }
5846   };
5847   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5848   if (!WithNowait) {
5849     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5850     llvm::Value *EndArgs[] = {
5851         IdentTLoc, // ident_t *<loc>
5852         ThreadId,  // i32 <gtid>
5853         Lock       // kmp_critical_name *&<lock>
5854     };
5855     CommonActionTy Action(nullptr, llvm::None,
5856                           OMPBuilder.getOrCreateRuntimeFunction(
5857                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5858                           EndArgs);
5859     AtomicRCG.setAction(Action);
5860     AtomicRCG(CGF);
5861   } else {
5862     AtomicRCG(CGF);
5863   }
5864 
5865   CGF.EmitBranch(DefaultBB);
5866   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5867 }
5868 
5869 /// Generates unique name for artificial threadprivate variables.
5870 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5871 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5872                                       const Expr *Ref) {
5873   SmallString<256> Buffer;
5874   llvm::raw_svector_ostream Out(Buffer);
5875   const clang::DeclRefExpr *DE;
5876   const VarDecl *D = ::getBaseDecl(Ref, DE);
5877   if (!D)
5878     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5879   D = D->getCanonicalDecl();
5880   std::string Name = CGM.getOpenMPRuntime().getName(
5881       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5882   Out << Prefix << Name << "_"
5883       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5884   return std::string(Out.str());
5885 }
5886 
5887 /// Emits reduction initializer function:
5888 /// \code
5889 /// void @.red_init(void* %arg, void* %orig) {
5890 /// %0 = bitcast void* %arg to <type>*
5891 /// store <type> <init>, <type>* %0
5892 /// ret void
5893 /// }
5894 /// \endcode
5895 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5896                                            SourceLocation Loc,
5897                                            ReductionCodeGen &RCG, unsigned N) {
5898   ASTContext &C = CGM.getContext();
5899   QualType VoidPtrTy = C.VoidPtrTy;
5900   VoidPtrTy.addRestrict();
5901   FunctionArgList Args;
5902   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5903                           ImplicitParamDecl::Other);
5904   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5905                               ImplicitParamDecl::Other);
5906   Args.emplace_back(&Param);
5907   Args.emplace_back(&ParamOrig);
5908   const auto &FnInfo =
5909       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5910   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5911   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5912   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5913                                     Name, &CGM.getModule());
5914   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5915   Fn->setDoesNotRecurse();
5916   CodeGenFunction CGF(CGM);
5917   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5918   Address PrivateAddr = CGF.EmitLoadOfPointer(
5919       CGF.GetAddrOfLocalVar(&Param),
5920       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5921   llvm::Value *Size = nullptr;
5922   // If the size of the reduction item is non-constant, load it from global
5923   // threadprivate variable.
5924   if (RCG.getSizes(N).second) {
5925     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5926         CGF, CGM.getContext().getSizeType(),
5927         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5928     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5929                                 CGM.getContext().getSizeType(), Loc);
5930   }
5931   RCG.emitAggregateType(CGF, N, Size);
5932   LValue OrigLVal;
5933   // If initializer uses initializer from declare reduction construct, emit a
5934   // pointer to the address of the original reduction item (reuired by reduction
5935   // initializer)
5936   if (RCG.usesReductionInitializer(N)) {
5937     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5938     SharedAddr = CGF.EmitLoadOfPointer(
5939         SharedAddr,
5940         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5941     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5942   } else {
5943     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5944         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5945         CGM.getContext().VoidPtrTy);
5946   }
5947   // Emit the initializer:
5948   // %0 = bitcast void* %arg to <type>*
5949   // store <type> <init>, <type>* %0
5950   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5951                          [](CodeGenFunction &) { return false; });
5952   CGF.FinishFunction();
5953   return Fn;
5954 }
5955 
5956 /// Emits reduction combiner function:
5957 /// \code
5958 /// void @.red_comb(void* %arg0, void* %arg1) {
5959 /// %lhs = bitcast void* %arg0 to <type>*
5960 /// %rhs = bitcast void* %arg1 to <type>*
5961 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5962 /// store <type> %2, <type>* %lhs
5963 /// ret void
5964 /// }
5965 /// \endcode
5966 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5967                                            SourceLocation Loc,
5968                                            ReductionCodeGen &RCG, unsigned N,
5969                                            const Expr *ReductionOp,
5970                                            const Expr *LHS, const Expr *RHS,
5971                                            const Expr *PrivateRef) {
5972   ASTContext &C = CGM.getContext();
5973   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5974   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5975   FunctionArgList Args;
5976   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5977                                C.VoidPtrTy, ImplicitParamDecl::Other);
5978   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5979                             ImplicitParamDecl::Other);
5980   Args.emplace_back(&ParamInOut);
5981   Args.emplace_back(&ParamIn);
5982   const auto &FnInfo =
5983       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5984   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5985   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5986   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5987                                     Name, &CGM.getModule());
5988   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5989   Fn->setDoesNotRecurse();
5990   CodeGenFunction CGF(CGM);
5991   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5992   llvm::Value *Size = nullptr;
5993   // If the size of the reduction item is non-constant, load it from global
5994   // threadprivate variable.
5995   if (RCG.getSizes(N).second) {
5996     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5997         CGF, CGM.getContext().getSizeType(),
5998         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5999     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6000                                 CGM.getContext().getSizeType(), Loc);
6001   }
6002   RCG.emitAggregateType(CGF, N, Size);
6003   // Remap lhs and rhs variables to the addresses of the function arguments.
6004   // %lhs = bitcast void* %arg0 to <type>*
6005   // %rhs = bitcast void* %arg1 to <type>*
6006   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6007   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6008     // Pull out the pointer to the variable.
6009     Address PtrAddr = CGF.EmitLoadOfPointer(
6010         CGF.GetAddrOfLocalVar(&ParamInOut),
6011         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6012     return CGF.Builder.CreateElementBitCast(
6013         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6014   });
6015   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6016     // Pull out the pointer to the variable.
6017     Address PtrAddr = CGF.EmitLoadOfPointer(
6018         CGF.GetAddrOfLocalVar(&ParamIn),
6019         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6020     return CGF.Builder.CreateElementBitCast(
6021         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6022   });
6023   PrivateScope.Privatize();
6024   // Emit the combiner body:
6025   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6026   // store <type> %2, <type>* %lhs
6027   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6028       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6029       cast<DeclRefExpr>(RHS));
6030   CGF.FinishFunction();
6031   return Fn;
6032 }
6033 
6034 /// Emits reduction finalizer function:
6035 /// \code
6036 /// void @.red_fini(void* %arg) {
6037 /// %0 = bitcast void* %arg to <type>*
6038 /// <destroy>(<type>* %0)
6039 /// ret void
6040 /// }
6041 /// \endcode
6042 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6043                                            SourceLocation Loc,
6044                                            ReductionCodeGen &RCG, unsigned N) {
6045   if (!RCG.needCleanups(N))
6046     return nullptr;
6047   ASTContext &C = CGM.getContext();
6048   FunctionArgList Args;
6049   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6050                           ImplicitParamDecl::Other);
6051   Args.emplace_back(&Param);
6052   const auto &FnInfo =
6053       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6054   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6055   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6056   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6057                                     Name, &CGM.getModule());
6058   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6059   Fn->setDoesNotRecurse();
6060   CodeGenFunction CGF(CGM);
6061   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6062   Address PrivateAddr = CGF.EmitLoadOfPointer(
6063       CGF.GetAddrOfLocalVar(&Param),
6064       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6065   llvm::Value *Size = nullptr;
6066   // If the size of the reduction item is non-constant, load it from global
6067   // threadprivate variable.
6068   if (RCG.getSizes(N).second) {
6069     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6070         CGF, CGM.getContext().getSizeType(),
6071         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6072     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6073                                 CGM.getContext().getSizeType(), Loc);
6074   }
6075   RCG.emitAggregateType(CGF, N, Size);
6076   // Emit the finalizer body:
6077   // <destroy>(<type>* %0)
6078   RCG.emitCleanups(CGF, N, PrivateAddr);
6079   CGF.FinishFunction(Loc);
6080   return Fn;
6081 }
6082 
6083 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6084     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6085     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6086   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6087     return nullptr;
6088 
6089   // Build typedef struct:
6090   // kmp_taskred_input {
6091   //   void *reduce_shar; // shared reduction item
6092   //   void *reduce_orig; // original reduction item used for initialization
6093   //   size_t reduce_size; // size of data item
6094   //   void *reduce_init; // data initialization routine
6095   //   void *reduce_fini; // data finalization routine
6096   //   void *reduce_comb; // data combiner routine
6097   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6098   // } kmp_taskred_input_t;
6099   ASTContext &C = CGM.getContext();
6100   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6101   RD->startDefinition();
6102   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6103   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6104   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6105   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6106   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6107   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6108   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6109       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6110   RD->completeDefinition();
6111   QualType RDType = C.getRecordType(RD);
6112   unsigned Size = Data.ReductionVars.size();
6113   llvm::APInt ArraySize(/*numBits=*/64, Size);
6114   QualType ArrayRDType = C.getConstantArrayType(
6115       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6116   // kmp_task_red_input_t .rd_input.[Size];
6117   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6118   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6119                        Data.ReductionCopies, Data.ReductionOps);
6120   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6121     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6122     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6123                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6124     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6125         TaskRedInput.getPointer(), Idxs,
6126         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6127         ".rd_input.gep.");
6128     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6129     // ElemLVal.reduce_shar = &Shareds[Cnt];
6130     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6131     RCG.emitSharedOrigLValue(CGF, Cnt);
6132     llvm::Value *CastedShared =
6133         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6134     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6135     // ElemLVal.reduce_orig = &Origs[Cnt];
6136     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6137     llvm::Value *CastedOrig =
6138         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6139     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6140     RCG.emitAggregateType(CGF, Cnt);
6141     llvm::Value *SizeValInChars;
6142     llvm::Value *SizeVal;
6143     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6144     // We use delayed creation/initialization for VLAs and array sections. It is
6145     // required because runtime does not provide the way to pass the sizes of
6146     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6147     // threadprivate global variables are used to store these values and use
6148     // them in the functions.
6149     bool DelayedCreation = !!SizeVal;
6150     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6151                                                /*isSigned=*/false);
6152     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6153     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6154     // ElemLVal.reduce_init = init;
6155     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6156     llvm::Value *InitAddr =
6157         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6158     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6159     // ElemLVal.reduce_fini = fini;
6160     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6161     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6162     llvm::Value *FiniAddr = Fini
6163                                 ? CGF.EmitCastToVoidPtr(Fini)
6164                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6165     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6166     // ElemLVal.reduce_comb = comb;
6167     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6168     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6169         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6170         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6171     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6172     // ElemLVal.flags = 0;
6173     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6174     if (DelayedCreation) {
6175       CGF.EmitStoreOfScalar(
6176           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6177           FlagsLVal);
6178     } else
6179       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6180                                  FlagsLVal.getType());
6181   }
6182   if (Data.IsReductionWithTaskMod) {
6183     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6184     // is_ws, int num, void *data);
6185     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6186     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6187                                                   CGM.IntTy, /*isSigned=*/true);
6188     llvm::Value *Args[] = {
6189         IdentTLoc, GTid,
6190         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6191                                /*isSigned=*/true),
6192         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6193         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6194             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6195     return CGF.EmitRuntimeCall(
6196         OMPBuilder.getOrCreateRuntimeFunction(
6197             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6198         Args);
6199   }
6200   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6201   llvm::Value *Args[] = {
6202       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6203                                 /*isSigned=*/true),
6204       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6205       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6206                                                       CGM.VoidPtrTy)};
6207   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6208                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6209                              Args);
6210 }
6211 
6212 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6213                                             SourceLocation Loc,
6214                                             bool IsWorksharingReduction) {
6215   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6216   // is_ws, int num, void *data);
6217   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6218   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6219                                                 CGM.IntTy, /*isSigned=*/true);
6220   llvm::Value *Args[] = {IdentTLoc, GTid,
6221                          llvm::ConstantInt::get(CGM.IntTy,
6222                                                 IsWorksharingReduction ? 1 : 0,
6223                                                 /*isSigned=*/true)};
6224   (void)CGF.EmitRuntimeCall(
6225       OMPBuilder.getOrCreateRuntimeFunction(
6226           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6227       Args);
6228 }
6229 
6230 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6231                                               SourceLocation Loc,
6232                                               ReductionCodeGen &RCG,
6233                                               unsigned N) {
6234   auto Sizes = RCG.getSizes(N);
6235   // Emit threadprivate global variable if the type is non-constant
6236   // (Sizes.second = nullptr).
6237   if (Sizes.second) {
6238     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6239                                                      /*isSigned=*/false);
6240     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6241         CGF, CGM.getContext().getSizeType(),
6242         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6243     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6244   }
6245 }
6246 
6247 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6248                                               SourceLocation Loc,
6249                                               llvm::Value *ReductionsPtr,
6250                                               LValue SharedLVal) {
6251   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6252   // *d);
6253   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6254                                                    CGM.IntTy,
6255                                                    /*isSigned=*/true),
6256                          ReductionsPtr,
6257                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6258                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6259   return Address(
6260       CGF.EmitRuntimeCall(
6261           OMPBuilder.getOrCreateRuntimeFunction(
6262               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6263           Args),
6264       SharedLVal.getAlignment());
6265 }
6266 
6267 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6268                                        SourceLocation Loc) {
6269   if (!CGF.HaveInsertPoint())
6270     return;
6271 
6272   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6273     OMPBuilder.createTaskwait(CGF.Builder);
6274   } else {
6275     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6276     // global_tid);
6277     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6278     // Ignore return result until untied tasks are supported.
6279     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6280                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6281                         Args);
6282   }
6283 
6284   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6285     Region->emitUntiedSwitch(CGF);
6286 }
6287 
6288 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6289                                            OpenMPDirectiveKind InnerKind,
6290                                            const RegionCodeGenTy &CodeGen,
6291                                            bool HasCancel) {
6292   if (!CGF.HaveInsertPoint())
6293     return;
6294   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6295                                  InnerKind != OMPD_critical &&
6296                                      InnerKind != OMPD_master &&
6297                                      InnerKind != OMPD_masked);
6298   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6299 }
6300 
6301 namespace {
6302 enum RTCancelKind {
6303   CancelNoreq = 0,
6304   CancelParallel = 1,
6305   CancelLoop = 2,
6306   CancelSections = 3,
6307   CancelTaskgroup = 4
6308 };
6309 } // anonymous namespace
6310 
6311 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6312   RTCancelKind CancelKind = CancelNoreq;
6313   if (CancelRegion == OMPD_parallel)
6314     CancelKind = CancelParallel;
6315   else if (CancelRegion == OMPD_for)
6316     CancelKind = CancelLoop;
6317   else if (CancelRegion == OMPD_sections)
6318     CancelKind = CancelSections;
6319   else {
6320     assert(CancelRegion == OMPD_taskgroup);
6321     CancelKind = CancelTaskgroup;
6322   }
6323   return CancelKind;
6324 }
6325 
6326 void CGOpenMPRuntime::emitCancellationPointCall(
6327     CodeGenFunction &CGF, SourceLocation Loc,
6328     OpenMPDirectiveKind CancelRegion) {
6329   if (!CGF.HaveInsertPoint())
6330     return;
6331   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6332   // global_tid, kmp_int32 cncl_kind);
6333   if (auto *OMPRegionInfo =
6334           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6335     // For 'cancellation point taskgroup', the task region info may not have a
6336     // cancel. This may instead happen in another adjacent task.
6337     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6338       llvm::Value *Args[] = {
6339           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6340           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6341       // Ignore return result until untied tasks are supported.
6342       llvm::Value *Result = CGF.EmitRuntimeCall(
6343           OMPBuilder.getOrCreateRuntimeFunction(
6344               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6345           Args);
6346       // if (__kmpc_cancellationpoint()) {
6347       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6348       //   exit from construct;
6349       // }
6350       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6351       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6352       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6353       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6354       CGF.EmitBlock(ExitBB);
6355       if (CancelRegion == OMPD_parallel)
6356         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6357       // exit from construct;
6358       CodeGenFunction::JumpDest CancelDest =
6359           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6360       CGF.EmitBranchThroughCleanup(CancelDest);
6361       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6362     }
6363   }
6364 }
6365 
6366 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6367                                      const Expr *IfCond,
6368                                      OpenMPDirectiveKind CancelRegion) {
6369   if (!CGF.HaveInsertPoint())
6370     return;
6371   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6372   // kmp_int32 cncl_kind);
6373   auto &M = CGM.getModule();
6374   if (auto *OMPRegionInfo =
6375           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6376     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6377                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6378       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6379       llvm::Value *Args[] = {
6380           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6381           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6382       // Ignore return result until untied tasks are supported.
6383       llvm::Value *Result = CGF.EmitRuntimeCall(
6384           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6385       // if (__kmpc_cancel()) {
6386       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6387       //   exit from construct;
6388       // }
6389       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6390       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6391       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6392       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6393       CGF.EmitBlock(ExitBB);
6394       if (CancelRegion == OMPD_parallel)
6395         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6396       // exit from construct;
6397       CodeGenFunction::JumpDest CancelDest =
6398           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6399       CGF.EmitBranchThroughCleanup(CancelDest);
6400       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6401     };
6402     if (IfCond) {
6403       emitIfClause(CGF, IfCond, ThenGen,
6404                    [](CodeGenFunction &, PrePostActionTy &) {});
6405     } else {
6406       RegionCodeGenTy ThenRCG(ThenGen);
6407       ThenRCG(CGF);
6408     }
6409   }
6410 }
6411 
6412 namespace {
6413 /// Cleanup action for uses_allocators support.
6414 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6415   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6416 
6417 public:
6418   OMPUsesAllocatorsActionTy(
6419       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6420       : Allocators(Allocators) {}
6421   void Enter(CodeGenFunction &CGF) override {
6422     if (!CGF.HaveInsertPoint())
6423       return;
6424     for (const auto &AllocatorData : Allocators) {
6425       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6426           CGF, AllocatorData.first, AllocatorData.second);
6427     }
6428   }
6429   void Exit(CodeGenFunction &CGF) override {
6430     if (!CGF.HaveInsertPoint())
6431       return;
6432     for (const auto &AllocatorData : Allocators) {
6433       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6434                                                         AllocatorData.first);
6435     }
6436   }
6437 };
6438 } // namespace
6439 
6440 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6441     const OMPExecutableDirective &D, StringRef ParentName,
6442     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6443     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6444   assert(!ParentName.empty() && "Invalid target region parent name!");
6445   HasEmittedTargetRegion = true;
6446   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6447   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6448     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6449       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6450       if (!D.AllocatorTraits)
6451         continue;
6452       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6453     }
6454   }
6455   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6456   CodeGen.setAction(UsesAllocatorAction);
6457   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6458                                    IsOffloadEntry, CodeGen);
6459 }
6460 
6461 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6462                                              const Expr *Allocator,
6463                                              const Expr *AllocatorTraits) {
6464   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6465   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6466   // Use default memspace handle.
6467   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6468   llvm::Value *NumTraits = llvm::ConstantInt::get(
6469       CGF.IntTy, cast<ConstantArrayType>(
6470                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6471                      ->getSize()
6472                      .getLimitedValue());
6473   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6474   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6475       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6476   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6477                                            AllocatorTraitsLVal.getBaseInfo(),
6478                                            AllocatorTraitsLVal.getTBAAInfo());
6479   llvm::Value *Traits =
6480       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6481 
6482   llvm::Value *AllocatorVal =
6483       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6484                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6485                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6486   // Store to allocator.
6487   CGF.EmitVarDecl(*cast<VarDecl>(
6488       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6489   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6490   AllocatorVal =
6491       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6492                                Allocator->getType(), Allocator->getExprLoc());
6493   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6494 }
6495 
6496 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6497                                              const Expr *Allocator) {
6498   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6499   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6500   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6501   llvm::Value *AllocatorVal =
6502       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6503   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6504                                           CGF.getContext().VoidPtrTy,
6505                                           Allocator->getExprLoc());
6506   (void)CGF.EmitRuntimeCall(
6507       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6508                                             OMPRTL___kmpc_destroy_allocator),
6509       {ThreadId, AllocatorVal});
6510 }
6511 
6512 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6513     const OMPExecutableDirective &D, StringRef ParentName,
6514     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6515     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6516   // Create a unique name for the entry function using the source location
6517   // information of the current target region. The name will be something like:
6518   //
6519   // __omp_offloading_DD_FFFF_PP_lBB
6520   //
6521   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6522   // mangled name of the function that encloses the target region and BB is the
6523   // line number of the target region.
6524 
6525   unsigned DeviceID;
6526   unsigned FileID;
6527   unsigned Line;
6528   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6529                            Line);
6530   SmallString<64> EntryFnName;
6531   {
6532     llvm::raw_svector_ostream OS(EntryFnName);
6533     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6534        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6535   }
6536 
6537   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6538 
6539   CodeGenFunction CGF(CGM, true);
6540   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6541   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6542 
6543   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6544 
6545   // If this target outline function is not an offload entry, we don't need to
6546   // register it.
6547   if (!IsOffloadEntry)
6548     return;
6549 
6550   // The target region ID is used by the runtime library to identify the current
6551   // target region, so it only has to be unique and not necessarily point to
6552   // anything. It could be the pointer to the outlined function that implements
6553   // the target region, but we aren't using that so that the compiler doesn't
6554   // need to keep that, and could therefore inline the host function if proven
6555   // worthwhile during optimization. In the other hand, if emitting code for the
6556   // device, the ID has to be the function address so that it can retrieved from
6557   // the offloading entry and launched by the runtime library. We also mark the
6558   // outlined function to have external linkage in case we are emitting code for
6559   // the device, because these functions will be entry points to the device.
6560 
6561   if (CGM.getLangOpts().OpenMPIsDevice) {
6562     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6563     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6564     OutlinedFn->setDSOLocal(false);
6565     if (CGM.getTriple().isAMDGCN())
6566       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6567   } else {
6568     std::string Name = getName({EntryFnName, "region_id"});
6569     OutlinedFnID = new llvm::GlobalVariable(
6570         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6571         llvm::GlobalValue::WeakAnyLinkage,
6572         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6573   }
6574 
6575   // Register the information for the entry associated with this target region.
6576   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6577       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6578       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6579 
6580   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6581   int32_t DefaultValTeams = -1;
6582   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6583   if (DefaultValTeams > 0) {
6584     OutlinedFn->addFnAttr("omp_target_num_teams",
6585                           std::to_string(DefaultValTeams));
6586   }
6587   int32_t DefaultValThreads = -1;
6588   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6589   if (DefaultValThreads > 0) {
6590     OutlinedFn->addFnAttr("omp_target_thread_limit",
6591                           std::to_string(DefaultValThreads));
6592   }
6593 }
6594 
6595 /// Checks if the expression is constant or does not have non-trivial function
6596 /// calls.
6597 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6598   // We can skip constant expressions.
6599   // We can skip expressions with trivial calls or simple expressions.
6600   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6601           !E->hasNonTrivialCall(Ctx)) &&
6602          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6603 }
6604 
6605 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6606                                                     const Stmt *Body) {
6607   const Stmt *Child = Body->IgnoreContainers();
6608   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6609     Child = nullptr;
6610     for (const Stmt *S : C->body()) {
6611       if (const auto *E = dyn_cast<Expr>(S)) {
6612         if (isTrivial(Ctx, E))
6613           continue;
6614       }
6615       // Some of the statements can be ignored.
6616       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6617           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6618         continue;
6619       // Analyze declarations.
6620       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6621         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6622               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6623                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6624                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6625                   isa<UsingDirectiveDecl>(D) ||
6626                   isa<OMPDeclareReductionDecl>(D) ||
6627                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6628                 return true;
6629               const auto *VD = dyn_cast<VarDecl>(D);
6630               if (!VD)
6631                 return false;
6632               return VD->hasGlobalStorage() || !VD->isUsed();
6633             }))
6634           continue;
6635       }
6636       // Found multiple children - cannot get the one child only.
6637       if (Child)
6638         return nullptr;
6639       Child = S;
6640     }
6641     if (Child)
6642       Child = Child->IgnoreContainers();
6643   }
6644   return Child;
6645 }
6646 
6647 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6648     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6649     int32_t &DefaultVal) {
6650 
6651   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6652   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6653          "Expected target-based executable directive.");
6654   switch (DirectiveKind) {
6655   case OMPD_target: {
6656     const auto *CS = D.getInnermostCapturedStmt();
6657     const auto *Body =
6658         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6659     const Stmt *ChildStmt =
6660         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6661     if (const auto *NestedDir =
6662             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6663       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6664         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6665           const Expr *NumTeams =
6666               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6667           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6668             if (auto Constant =
6669                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6670               DefaultVal = Constant->getExtValue();
6671           return NumTeams;
6672         }
6673         DefaultVal = 0;
6674         return nullptr;
6675       }
6676       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6677           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6678         DefaultVal = 1;
6679         return nullptr;
6680       }
6681       DefaultVal = 1;
6682       return nullptr;
6683     }
6684     // A value of -1 is used to check if we need to emit no teams region
6685     DefaultVal = -1;
6686     return nullptr;
6687   }
6688   case OMPD_target_teams:
6689   case OMPD_target_teams_distribute:
6690   case OMPD_target_teams_distribute_simd:
6691   case OMPD_target_teams_distribute_parallel_for:
6692   case OMPD_target_teams_distribute_parallel_for_simd: {
6693     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6694       const Expr *NumTeams =
6695           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6696       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6697         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6698           DefaultVal = Constant->getExtValue();
6699       return NumTeams;
6700     }
6701     DefaultVal = 0;
6702     return nullptr;
6703   }
6704   case OMPD_target_parallel:
6705   case OMPD_target_parallel_for:
6706   case OMPD_target_parallel_for_simd:
6707   case OMPD_target_simd:
6708     DefaultVal = 1;
6709     return nullptr;
6710   case OMPD_parallel:
6711   case OMPD_for:
6712   case OMPD_parallel_for:
6713   case OMPD_parallel_master:
6714   case OMPD_parallel_sections:
6715   case OMPD_for_simd:
6716   case OMPD_parallel_for_simd:
6717   case OMPD_cancel:
6718   case OMPD_cancellation_point:
6719   case OMPD_ordered:
6720   case OMPD_threadprivate:
6721   case OMPD_allocate:
6722   case OMPD_task:
6723   case OMPD_simd:
6724   case OMPD_tile:
6725   case OMPD_unroll:
6726   case OMPD_sections:
6727   case OMPD_section:
6728   case OMPD_single:
6729   case OMPD_master:
6730   case OMPD_critical:
6731   case OMPD_taskyield:
6732   case OMPD_barrier:
6733   case OMPD_taskwait:
6734   case OMPD_taskgroup:
6735   case OMPD_atomic:
6736   case OMPD_flush:
6737   case OMPD_depobj:
6738   case OMPD_scan:
6739   case OMPD_teams:
6740   case OMPD_target_data:
6741   case OMPD_target_exit_data:
6742   case OMPD_target_enter_data:
6743   case OMPD_distribute:
6744   case OMPD_distribute_simd:
6745   case OMPD_distribute_parallel_for:
6746   case OMPD_distribute_parallel_for_simd:
6747   case OMPD_teams_distribute:
6748   case OMPD_teams_distribute_simd:
6749   case OMPD_teams_distribute_parallel_for:
6750   case OMPD_teams_distribute_parallel_for_simd:
6751   case OMPD_target_update:
6752   case OMPD_declare_simd:
6753   case OMPD_declare_variant:
6754   case OMPD_begin_declare_variant:
6755   case OMPD_end_declare_variant:
6756   case OMPD_declare_target:
6757   case OMPD_end_declare_target:
6758   case OMPD_declare_reduction:
6759   case OMPD_declare_mapper:
6760   case OMPD_taskloop:
6761   case OMPD_taskloop_simd:
6762   case OMPD_master_taskloop:
6763   case OMPD_master_taskloop_simd:
6764   case OMPD_parallel_master_taskloop:
6765   case OMPD_parallel_master_taskloop_simd:
6766   case OMPD_requires:
6767   case OMPD_metadirective:
6768   case OMPD_unknown:
6769     break;
6770   default:
6771     break;
6772   }
6773   llvm_unreachable("Unexpected directive kind.");
6774 }
6775 
6776 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6777     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6778   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6779          "Clauses associated with the teams directive expected to be emitted "
6780          "only for the host!");
6781   CGBuilderTy &Bld = CGF.Builder;
6782   int32_t DefaultNT = -1;
6783   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6784   if (NumTeams != nullptr) {
6785     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6786 
6787     switch (DirectiveKind) {
6788     case OMPD_target: {
6789       const auto *CS = D.getInnermostCapturedStmt();
6790       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6791       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6792       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6793                                                   /*IgnoreResultAssign*/ true);
6794       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6795                              /*isSigned=*/true);
6796     }
6797     case OMPD_target_teams:
6798     case OMPD_target_teams_distribute:
6799     case OMPD_target_teams_distribute_simd:
6800     case OMPD_target_teams_distribute_parallel_for:
6801     case OMPD_target_teams_distribute_parallel_for_simd: {
6802       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6803       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6804                                                   /*IgnoreResultAssign*/ true);
6805       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6806                              /*isSigned=*/true);
6807     }
6808     default:
6809       break;
6810     }
6811   } else if (DefaultNT == -1) {
6812     return nullptr;
6813   }
6814 
6815   return Bld.getInt32(DefaultNT);
6816 }
6817 
6818 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6819                                   llvm::Value *DefaultThreadLimitVal) {
6820   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6821       CGF.getContext(), CS->getCapturedStmt());
6822   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6823     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6824       llvm::Value *NumThreads = nullptr;
6825       llvm::Value *CondVal = nullptr;
6826       // Handle if clause. If if clause present, the number of threads is
6827       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6828       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6829         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6830         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6831         const OMPIfClause *IfClause = nullptr;
6832         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6833           if (C->getNameModifier() == OMPD_unknown ||
6834               C->getNameModifier() == OMPD_parallel) {
6835             IfClause = C;
6836             break;
6837           }
6838         }
6839         if (IfClause) {
6840           const Expr *Cond = IfClause->getCondition();
6841           bool Result;
6842           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6843             if (!Result)
6844               return CGF.Builder.getInt32(1);
6845           } else {
6846             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6847             if (const auto *PreInit =
6848                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6849               for (const auto *I : PreInit->decls()) {
6850                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6851                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6852                 } else {
6853                   CodeGenFunction::AutoVarEmission Emission =
6854                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6855                   CGF.EmitAutoVarCleanups(Emission);
6856                 }
6857               }
6858             }
6859             CondVal = CGF.EvaluateExprAsBool(Cond);
6860           }
6861         }
6862       }
6863       // Check the value of num_threads clause iff if clause was not specified
6864       // or is not evaluated to false.
6865       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6866         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6867         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6868         const auto *NumThreadsClause =
6869             Dir->getSingleClause<OMPNumThreadsClause>();
6870         CodeGenFunction::LexicalScope Scope(
6871             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6872         if (const auto *PreInit =
6873                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6874           for (const auto *I : PreInit->decls()) {
6875             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6876               CGF.EmitVarDecl(cast<VarDecl>(*I));
6877             } else {
6878               CodeGenFunction::AutoVarEmission Emission =
6879                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6880               CGF.EmitAutoVarCleanups(Emission);
6881             }
6882           }
6883         }
6884         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6885         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6886                                                /*isSigned=*/false);
6887         if (DefaultThreadLimitVal)
6888           NumThreads = CGF.Builder.CreateSelect(
6889               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6890               DefaultThreadLimitVal, NumThreads);
6891       } else {
6892         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6893                                            : CGF.Builder.getInt32(0);
6894       }
6895       // Process condition of the if clause.
6896       if (CondVal) {
6897         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6898                                               CGF.Builder.getInt32(1));
6899       }
6900       return NumThreads;
6901     }
6902     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6903       return CGF.Builder.getInt32(1);
6904     return DefaultThreadLimitVal;
6905   }
6906   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6907                                : CGF.Builder.getInt32(0);
6908 }
6909 
6910 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6911     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6912     int32_t &DefaultVal) {
6913   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6914   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6915          "Expected target-based executable directive.");
6916 
6917   switch (DirectiveKind) {
6918   case OMPD_target:
6919     // Teams have no clause thread_limit
6920     return nullptr;
6921   case OMPD_target_teams:
6922   case OMPD_target_teams_distribute:
6923     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6924       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6925       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6926       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6927         if (auto Constant =
6928                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6929           DefaultVal = Constant->getExtValue();
6930       return ThreadLimit;
6931     }
6932     return nullptr;
6933   case OMPD_target_parallel:
6934   case OMPD_target_parallel_for:
6935   case OMPD_target_parallel_for_simd:
6936   case OMPD_target_teams_distribute_parallel_for:
6937   case OMPD_target_teams_distribute_parallel_for_simd: {
6938     Expr *ThreadLimit = nullptr;
6939     Expr *NumThreads = nullptr;
6940     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6941       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6942       ThreadLimit = ThreadLimitClause->getThreadLimit();
6943       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6944         if (auto Constant =
6945                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6946           DefaultVal = Constant->getExtValue();
6947     }
6948     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6949       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6950       NumThreads = NumThreadsClause->getNumThreads();
6951       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6952         if (auto Constant =
6953                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6954           if (Constant->getExtValue() < DefaultVal) {
6955             DefaultVal = Constant->getExtValue();
6956             ThreadLimit = NumThreads;
6957           }
6958         }
6959       }
6960     }
6961     return ThreadLimit;
6962   }
6963   case OMPD_target_teams_distribute_simd:
6964   case OMPD_target_simd:
6965     DefaultVal = 1;
6966     return nullptr;
6967   case OMPD_parallel:
6968   case OMPD_for:
6969   case OMPD_parallel_for:
6970   case OMPD_parallel_master:
6971   case OMPD_parallel_sections:
6972   case OMPD_for_simd:
6973   case OMPD_parallel_for_simd:
6974   case OMPD_cancel:
6975   case OMPD_cancellation_point:
6976   case OMPD_ordered:
6977   case OMPD_threadprivate:
6978   case OMPD_allocate:
6979   case OMPD_task:
6980   case OMPD_simd:
6981   case OMPD_tile:
6982   case OMPD_unroll:
6983   case OMPD_sections:
6984   case OMPD_section:
6985   case OMPD_single:
6986   case OMPD_master:
6987   case OMPD_critical:
6988   case OMPD_taskyield:
6989   case OMPD_barrier:
6990   case OMPD_taskwait:
6991   case OMPD_taskgroup:
6992   case OMPD_atomic:
6993   case OMPD_flush:
6994   case OMPD_depobj:
6995   case OMPD_scan:
6996   case OMPD_teams:
6997   case OMPD_target_data:
6998   case OMPD_target_exit_data:
6999   case OMPD_target_enter_data:
7000   case OMPD_distribute:
7001   case OMPD_distribute_simd:
7002   case OMPD_distribute_parallel_for:
7003   case OMPD_distribute_parallel_for_simd:
7004   case OMPD_teams_distribute:
7005   case OMPD_teams_distribute_simd:
7006   case OMPD_teams_distribute_parallel_for:
7007   case OMPD_teams_distribute_parallel_for_simd:
7008   case OMPD_target_update:
7009   case OMPD_declare_simd:
7010   case OMPD_declare_variant:
7011   case OMPD_begin_declare_variant:
7012   case OMPD_end_declare_variant:
7013   case OMPD_declare_target:
7014   case OMPD_end_declare_target:
7015   case OMPD_declare_reduction:
7016   case OMPD_declare_mapper:
7017   case OMPD_taskloop:
7018   case OMPD_taskloop_simd:
7019   case OMPD_master_taskloop:
7020   case OMPD_master_taskloop_simd:
7021   case OMPD_parallel_master_taskloop:
7022   case OMPD_parallel_master_taskloop_simd:
7023   case OMPD_requires:
7024   case OMPD_unknown:
7025     break;
7026   default:
7027     break;
7028   }
7029   llvm_unreachable("Unsupported directive kind.");
7030 }
7031 
7032 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7033     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7034   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7035          "Clauses associated with the teams directive expected to be emitted "
7036          "only for the host!");
7037   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7038   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7039          "Expected target-based executable directive.");
7040   CGBuilderTy &Bld = CGF.Builder;
7041   llvm::Value *ThreadLimitVal = nullptr;
7042   llvm::Value *NumThreadsVal = nullptr;
7043   switch (DirectiveKind) {
7044   case OMPD_target: {
7045     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7046     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7047       return NumThreads;
7048     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7049         CGF.getContext(), CS->getCapturedStmt());
7050     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7051       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7052         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7053         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7054         const auto *ThreadLimitClause =
7055             Dir->getSingleClause<OMPThreadLimitClause>();
7056         CodeGenFunction::LexicalScope Scope(
7057             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7058         if (const auto *PreInit =
7059                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7060           for (const auto *I : PreInit->decls()) {
7061             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7062               CGF.EmitVarDecl(cast<VarDecl>(*I));
7063             } else {
7064               CodeGenFunction::AutoVarEmission Emission =
7065                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7066               CGF.EmitAutoVarCleanups(Emission);
7067             }
7068           }
7069         }
7070         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7071             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7072         ThreadLimitVal =
7073             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7074       }
7075       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7076           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7077         CS = Dir->getInnermostCapturedStmt();
7078         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7079             CGF.getContext(), CS->getCapturedStmt());
7080         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7081       }
7082       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7083           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7084         CS = Dir->getInnermostCapturedStmt();
7085         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7086           return NumThreads;
7087       }
7088       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7089         return Bld.getInt32(1);
7090     }
7091     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7092   }
7093   case OMPD_target_teams: {
7094     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7095       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7096       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7097       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7098           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7099       ThreadLimitVal =
7100           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7101     }
7102     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7103     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7104       return NumThreads;
7105     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7106         CGF.getContext(), CS->getCapturedStmt());
7107     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7108       if (Dir->getDirectiveKind() == OMPD_distribute) {
7109         CS = Dir->getInnermostCapturedStmt();
7110         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7111           return NumThreads;
7112       }
7113     }
7114     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7115   }
7116   case OMPD_target_teams_distribute:
7117     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7118       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7119       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7120       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7121           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7122       ThreadLimitVal =
7123           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7124     }
7125     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7126   case OMPD_target_parallel:
7127   case OMPD_target_parallel_for:
7128   case OMPD_target_parallel_for_simd:
7129   case OMPD_target_teams_distribute_parallel_for:
7130   case OMPD_target_teams_distribute_parallel_for_simd: {
7131     llvm::Value *CondVal = nullptr;
7132     // Handle if clause. If if clause present, the number of threads is
7133     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7134     if (D.hasClausesOfKind<OMPIfClause>()) {
7135       const OMPIfClause *IfClause = nullptr;
7136       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7137         if (C->getNameModifier() == OMPD_unknown ||
7138             C->getNameModifier() == OMPD_parallel) {
7139           IfClause = C;
7140           break;
7141         }
7142       }
7143       if (IfClause) {
7144         const Expr *Cond = IfClause->getCondition();
7145         bool Result;
7146         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7147           if (!Result)
7148             return Bld.getInt32(1);
7149         } else {
7150           CodeGenFunction::RunCleanupsScope Scope(CGF);
7151           CondVal = CGF.EvaluateExprAsBool(Cond);
7152         }
7153       }
7154     }
7155     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7156       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7157       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7158       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7159           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7160       ThreadLimitVal =
7161           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7162     }
7163     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7164       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7165       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7166       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7167           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7168       NumThreadsVal =
7169           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7170       ThreadLimitVal = ThreadLimitVal
7171                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7172                                                                 ThreadLimitVal),
7173                                               NumThreadsVal, ThreadLimitVal)
7174                            : NumThreadsVal;
7175     }
7176     if (!ThreadLimitVal)
7177       ThreadLimitVal = Bld.getInt32(0);
7178     if (CondVal)
7179       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7180     return ThreadLimitVal;
7181   }
7182   case OMPD_target_teams_distribute_simd:
7183   case OMPD_target_simd:
7184     return Bld.getInt32(1);
7185   case OMPD_parallel:
7186   case OMPD_for:
7187   case OMPD_parallel_for:
7188   case OMPD_parallel_master:
7189   case OMPD_parallel_sections:
7190   case OMPD_for_simd:
7191   case OMPD_parallel_for_simd:
7192   case OMPD_cancel:
7193   case OMPD_cancellation_point:
7194   case OMPD_ordered:
7195   case OMPD_threadprivate:
7196   case OMPD_allocate:
7197   case OMPD_task:
7198   case OMPD_simd:
7199   case OMPD_tile:
7200   case OMPD_unroll:
7201   case OMPD_sections:
7202   case OMPD_section:
7203   case OMPD_single:
7204   case OMPD_master:
7205   case OMPD_critical:
7206   case OMPD_taskyield:
7207   case OMPD_barrier:
7208   case OMPD_taskwait:
7209   case OMPD_taskgroup:
7210   case OMPD_atomic:
7211   case OMPD_flush:
7212   case OMPD_depobj:
7213   case OMPD_scan:
7214   case OMPD_teams:
7215   case OMPD_target_data:
7216   case OMPD_target_exit_data:
7217   case OMPD_target_enter_data:
7218   case OMPD_distribute:
7219   case OMPD_distribute_simd:
7220   case OMPD_distribute_parallel_for:
7221   case OMPD_distribute_parallel_for_simd:
7222   case OMPD_teams_distribute:
7223   case OMPD_teams_distribute_simd:
7224   case OMPD_teams_distribute_parallel_for:
7225   case OMPD_teams_distribute_parallel_for_simd:
7226   case OMPD_target_update:
7227   case OMPD_declare_simd:
7228   case OMPD_declare_variant:
7229   case OMPD_begin_declare_variant:
7230   case OMPD_end_declare_variant:
7231   case OMPD_declare_target:
7232   case OMPD_end_declare_target:
7233   case OMPD_declare_reduction:
7234   case OMPD_declare_mapper:
7235   case OMPD_taskloop:
7236   case OMPD_taskloop_simd:
7237   case OMPD_master_taskloop:
7238   case OMPD_master_taskloop_simd:
7239   case OMPD_parallel_master_taskloop:
7240   case OMPD_parallel_master_taskloop_simd:
7241   case OMPD_requires:
7242   case OMPD_metadirective:
7243   case OMPD_unknown:
7244     break;
7245   default:
7246     break;
7247   }
7248   llvm_unreachable("Unsupported directive kind.");
7249 }
7250 
7251 namespace {
7252 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7253 
7254 // Utility to handle information from clauses associated with a given
7255 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7256 // It provides a convenient interface to obtain the information and generate
7257 // code for that information.
7258 class MappableExprsHandler {
7259 public:
7260   /// Values for bit flags used to specify the mapping type for
7261   /// offloading.
7262   enum OpenMPOffloadMappingFlags : uint64_t {
7263     /// No flags
7264     OMP_MAP_NONE = 0x0,
7265     /// Allocate memory on the device and move data from host to device.
7266     OMP_MAP_TO = 0x01,
7267     /// Allocate memory on the device and move data from device to host.
7268     OMP_MAP_FROM = 0x02,
7269     /// Always perform the requested mapping action on the element, even
7270     /// if it was already mapped before.
7271     OMP_MAP_ALWAYS = 0x04,
7272     /// Delete the element from the device environment, ignoring the
7273     /// current reference count associated with the element.
7274     OMP_MAP_DELETE = 0x08,
7275     /// The element being mapped is a pointer-pointee pair; both the
7276     /// pointer and the pointee should be mapped.
7277     OMP_MAP_PTR_AND_OBJ = 0x10,
7278     /// This flags signals that the base address of an entry should be
7279     /// passed to the target kernel as an argument.
7280     OMP_MAP_TARGET_PARAM = 0x20,
7281     /// Signal that the runtime library has to return the device pointer
7282     /// in the current position for the data being mapped. Used when we have the
7283     /// use_device_ptr or use_device_addr clause.
7284     OMP_MAP_RETURN_PARAM = 0x40,
7285     /// This flag signals that the reference being passed is a pointer to
7286     /// private data.
7287     OMP_MAP_PRIVATE = 0x80,
7288     /// Pass the element to the device by value.
7289     OMP_MAP_LITERAL = 0x100,
7290     /// Implicit map
7291     OMP_MAP_IMPLICIT = 0x200,
7292     /// Close is a hint to the runtime to allocate memory close to
7293     /// the target device.
7294     OMP_MAP_CLOSE = 0x400,
7295     /// 0x800 is reserved for compatibility with XLC.
7296     /// Produce a runtime error if the data is not already allocated.
7297     OMP_MAP_PRESENT = 0x1000,
7298     // Increment and decrement a separate reference counter so that the data
7299     // cannot be unmapped within the associated region.  Thus, this flag is
7300     // intended to be used on 'target' and 'target data' directives because they
7301     // are inherently structured.  It is not intended to be used on 'target
7302     // enter data' and 'target exit data' directives because they are inherently
7303     // dynamic.
7304     // This is an OpenMP extension for the sake of OpenACC support.
7305     OMP_MAP_OMPX_HOLD = 0x2000,
7306     /// Signal that the runtime library should use args as an array of
7307     /// descriptor_dim pointers and use args_size as dims. Used when we have
7308     /// non-contiguous list items in target update directive
7309     OMP_MAP_NON_CONTIG = 0x100000000000,
7310     /// The 16 MSBs of the flags indicate whether the entry is member of some
7311     /// struct/class.
7312     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7313     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7314   };
7315 
7316   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7317   static unsigned getFlagMemberOffset() {
7318     unsigned Offset = 0;
7319     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7320          Remain = Remain >> 1)
7321       Offset++;
7322     return Offset;
7323   }
7324 
7325   /// Class that holds debugging information for a data mapping to be passed to
7326   /// the runtime library.
7327   class MappingExprInfo {
7328     /// The variable declaration used for the data mapping.
7329     const ValueDecl *MapDecl = nullptr;
7330     /// The original expression used in the map clause, or null if there is
7331     /// none.
7332     const Expr *MapExpr = nullptr;
7333 
7334   public:
7335     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7336         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7337 
7338     const ValueDecl *getMapDecl() const { return MapDecl; }
7339     const Expr *getMapExpr() const { return MapExpr; }
7340   };
7341 
7342   /// Class that associates information with a base pointer to be passed to the
7343   /// runtime library.
7344   class BasePointerInfo {
7345     /// The base pointer.
7346     llvm::Value *Ptr = nullptr;
7347     /// The base declaration that refers to this device pointer, or null if
7348     /// there is none.
7349     const ValueDecl *DevPtrDecl = nullptr;
7350 
7351   public:
7352     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7353         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7354     llvm::Value *operator*() const { return Ptr; }
7355     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7356     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7357   };
7358 
7359   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7360   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7361   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7362   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7363   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7364   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7365   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7366 
7367   /// This structure contains combined information generated for mappable
7368   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7369   /// mappers, and non-contiguous information.
7370   struct MapCombinedInfoTy {
7371     struct StructNonContiguousInfo {
7372       bool IsNonContiguous = false;
7373       MapDimArrayTy Dims;
7374       MapNonContiguousArrayTy Offsets;
7375       MapNonContiguousArrayTy Counts;
7376       MapNonContiguousArrayTy Strides;
7377     };
7378     MapExprsArrayTy Exprs;
7379     MapBaseValuesArrayTy BasePointers;
7380     MapValuesArrayTy Pointers;
7381     MapValuesArrayTy Sizes;
7382     MapFlagsArrayTy Types;
7383     MapMappersArrayTy Mappers;
7384     StructNonContiguousInfo NonContigInfo;
7385 
7386     /// Append arrays in \a CurInfo.
7387     void append(MapCombinedInfoTy &CurInfo) {
7388       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7389       BasePointers.append(CurInfo.BasePointers.begin(),
7390                           CurInfo.BasePointers.end());
7391       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7392       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7393       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7394       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7395       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7396                                  CurInfo.NonContigInfo.Dims.end());
7397       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7398                                     CurInfo.NonContigInfo.Offsets.end());
7399       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7400                                    CurInfo.NonContigInfo.Counts.end());
7401       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7402                                     CurInfo.NonContigInfo.Strides.end());
7403     }
7404   };
7405 
7406   /// Map between a struct and the its lowest & highest elements which have been
7407   /// mapped.
7408   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7409   ///                    HE(FieldIndex, Pointer)}
7410   struct StructRangeInfoTy {
7411     MapCombinedInfoTy PreliminaryMapData;
7412     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7413         0, Address::invalid()};
7414     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7415         0, Address::invalid()};
7416     Address Base = Address::invalid();
7417     Address LB = Address::invalid();
7418     bool IsArraySection = false;
7419     bool HasCompleteRecord = false;
7420   };
7421 
7422 private:
7423   /// Kind that defines how a device pointer has to be returned.
7424   struct MapInfo {
7425     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7426     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7427     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7428     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7429     bool ReturnDevicePointer = false;
7430     bool IsImplicit = false;
7431     const ValueDecl *Mapper = nullptr;
7432     const Expr *VarRef = nullptr;
7433     bool ForDeviceAddr = false;
7434 
7435     MapInfo() = default;
7436     MapInfo(
7437         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7438         OpenMPMapClauseKind MapType,
7439         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7440         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7441         bool ReturnDevicePointer, bool IsImplicit,
7442         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7443         bool ForDeviceAddr = false)
7444         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7445           MotionModifiers(MotionModifiers),
7446           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7447           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7448   };
7449 
7450   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7451   /// member and there is no map information about it, then emission of that
7452   /// entry is deferred until the whole struct has been processed.
7453   struct DeferredDevicePtrEntryTy {
7454     const Expr *IE = nullptr;
7455     const ValueDecl *VD = nullptr;
7456     bool ForDeviceAddr = false;
7457 
7458     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7459                              bool ForDeviceAddr)
7460         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7461   };
7462 
7463   /// The target directive from where the mappable clauses were extracted. It
7464   /// is either a executable directive or a user-defined mapper directive.
7465   llvm::PointerUnion<const OMPExecutableDirective *,
7466                      const OMPDeclareMapperDecl *>
7467       CurDir;
7468 
7469   /// Function the directive is being generated for.
7470   CodeGenFunction &CGF;
7471 
7472   /// Set of all first private variables in the current directive.
7473   /// bool data is set to true if the variable is implicitly marked as
7474   /// firstprivate, false otherwise.
7475   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7476 
7477   /// Map between device pointer declarations and their expression components.
7478   /// The key value for declarations in 'this' is null.
7479   llvm::DenseMap<
7480       const ValueDecl *,
7481       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7482       DevPointersMap;
7483 
7484   /// Map between lambda declarations and their map type.
7485   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7486 
7487   llvm::Value *getExprTypeSize(const Expr *E) const {
7488     QualType ExprTy = E->getType().getCanonicalType();
7489 
7490     // Calculate the size for array shaping expression.
7491     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7492       llvm::Value *Size =
7493           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7494       for (const Expr *SE : OAE->getDimensions()) {
7495         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7496         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7497                                       CGF.getContext().getSizeType(),
7498                                       SE->getExprLoc());
7499         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7500       }
7501       return Size;
7502     }
7503 
7504     // Reference types are ignored for mapping purposes.
7505     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7506       ExprTy = RefTy->getPointeeType().getCanonicalType();
7507 
7508     // Given that an array section is considered a built-in type, we need to
7509     // do the calculation based on the length of the section instead of relying
7510     // on CGF.getTypeSize(E->getType()).
7511     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7512       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7513                             OAE->getBase()->IgnoreParenImpCasts())
7514                             .getCanonicalType();
7515 
7516       // If there is no length associated with the expression and lower bound is
7517       // not specified too, that means we are using the whole length of the
7518       // base.
7519       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7520           !OAE->getLowerBound())
7521         return CGF.getTypeSize(BaseTy);
7522 
7523       llvm::Value *ElemSize;
7524       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7525         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7526       } else {
7527         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7528         assert(ATy && "Expecting array type if not a pointer type.");
7529         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7530       }
7531 
7532       // If we don't have a length at this point, that is because we have an
7533       // array section with a single element.
7534       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7535         return ElemSize;
7536 
7537       if (const Expr *LenExpr = OAE->getLength()) {
7538         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7539         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7540                                              CGF.getContext().getSizeType(),
7541                                              LenExpr->getExprLoc());
7542         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7543       }
7544       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7545              OAE->getLowerBound() && "expected array_section[lb:].");
7546       // Size = sizetype - lb * elemtype;
7547       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7548       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7549       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7550                                        CGF.getContext().getSizeType(),
7551                                        OAE->getLowerBound()->getExprLoc());
7552       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7553       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7554       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7555       LengthVal = CGF.Builder.CreateSelect(
7556           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7557       return LengthVal;
7558     }
7559     return CGF.getTypeSize(ExprTy);
7560   }
7561 
7562   /// Return the corresponding bits for a given map clause modifier. Add
7563   /// a flag marking the map as a pointer if requested. Add a flag marking the
7564   /// map as the first one of a series of maps that relate to the same map
7565   /// expression.
7566   OpenMPOffloadMappingFlags getMapTypeBits(
7567       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7568       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7569       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7570     OpenMPOffloadMappingFlags Bits =
7571         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7572     switch (MapType) {
7573     case OMPC_MAP_alloc:
7574     case OMPC_MAP_release:
7575       // alloc and release is the default behavior in the runtime library,  i.e.
7576       // if we don't pass any bits alloc/release that is what the runtime is
7577       // going to do. Therefore, we don't need to signal anything for these two
7578       // type modifiers.
7579       break;
7580     case OMPC_MAP_to:
7581       Bits |= OMP_MAP_TO;
7582       break;
7583     case OMPC_MAP_from:
7584       Bits |= OMP_MAP_FROM;
7585       break;
7586     case OMPC_MAP_tofrom:
7587       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7588       break;
7589     case OMPC_MAP_delete:
7590       Bits |= OMP_MAP_DELETE;
7591       break;
7592     case OMPC_MAP_unknown:
7593       llvm_unreachable("Unexpected map type!");
7594     }
7595     if (AddPtrFlag)
7596       Bits |= OMP_MAP_PTR_AND_OBJ;
7597     if (AddIsTargetParamFlag)
7598       Bits |= OMP_MAP_TARGET_PARAM;
7599     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7600       Bits |= OMP_MAP_ALWAYS;
7601     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7602       Bits |= OMP_MAP_CLOSE;
7603     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7604         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7605       Bits |= OMP_MAP_PRESENT;
7606     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7607       Bits |= OMP_MAP_OMPX_HOLD;
7608     if (IsNonContiguous)
7609       Bits |= OMP_MAP_NON_CONTIG;
7610     return Bits;
7611   }
7612 
7613   /// Return true if the provided expression is a final array section. A
7614   /// final array section, is one whose length can't be proved to be one.
7615   bool isFinalArraySectionExpression(const Expr *E) const {
7616     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7617 
7618     // It is not an array section and therefore not a unity-size one.
7619     if (!OASE)
7620       return false;
7621 
7622     // An array section with no colon always refer to a single element.
7623     if (OASE->getColonLocFirst().isInvalid())
7624       return false;
7625 
7626     const Expr *Length = OASE->getLength();
7627 
7628     // If we don't have a length we have to check if the array has size 1
7629     // for this dimension. Also, we should always expect a length if the
7630     // base type is pointer.
7631     if (!Length) {
7632       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7633                              OASE->getBase()->IgnoreParenImpCasts())
7634                              .getCanonicalType();
7635       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7636         return ATy->getSize().getSExtValue() != 1;
7637       // If we don't have a constant dimension length, we have to consider
7638       // the current section as having any size, so it is not necessarily
7639       // unitary. If it happen to be unity size, that's user fault.
7640       return true;
7641     }
7642 
7643     // Check if the length evaluates to 1.
7644     Expr::EvalResult Result;
7645     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7646       return true; // Can have more that size 1.
7647 
7648     llvm::APSInt ConstLength = Result.Val.getInt();
7649     return ConstLength.getSExtValue() != 1;
7650   }
7651 
7652   /// Generate the base pointers, section pointers, sizes, map type bits, and
7653   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7654   /// map type, map or motion modifiers, and expression components.
7655   /// \a IsFirstComponent should be set to true if the provided set of
7656   /// components is the first associated with a capture.
7657   void generateInfoForComponentList(
7658       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7659       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7660       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7661       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7662       bool IsFirstComponentList, bool IsImplicit,
7663       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7664       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7665       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7666           OverlappedElements = llvm::None) const {
7667     // The following summarizes what has to be generated for each map and the
7668     // types below. The generated information is expressed in this order:
7669     // base pointer, section pointer, size, flags
7670     // (to add to the ones that come from the map type and modifier).
7671     //
7672     // double d;
7673     // int i[100];
7674     // float *p;
7675     //
7676     // struct S1 {
7677     //   int i;
7678     //   float f[50];
7679     // }
7680     // struct S2 {
7681     //   int i;
7682     //   float f[50];
7683     //   S1 s;
7684     //   double *p;
7685     //   struct S2 *ps;
7686     //   int &ref;
7687     // }
7688     // S2 s;
7689     // S2 *ps;
7690     //
7691     // map(d)
7692     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7693     //
7694     // map(i)
7695     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7696     //
7697     // map(i[1:23])
7698     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7699     //
7700     // map(p)
7701     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7702     //
7703     // map(p[1:24])
7704     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7705     // in unified shared memory mode or for local pointers
7706     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7707     //
7708     // map(s)
7709     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7710     //
7711     // map(s.i)
7712     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7713     //
7714     // map(s.s.f)
7715     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7716     //
7717     // map(s.p)
7718     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7719     //
7720     // map(to: s.p[:22])
7721     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7722     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7723     // &(s.p), &(s.p[0]), 22*sizeof(double),
7724     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7725     // (*) alloc space for struct members, only this is a target parameter
7726     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7727     //      optimizes this entry out, same in the examples below)
7728     // (***) map the pointee (map: to)
7729     //
7730     // map(to: s.ref)
7731     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7732     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7733     // (*) alloc space for struct members, only this is a target parameter
7734     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7735     //      optimizes this entry out, same in the examples below)
7736     // (***) map the pointee (map: to)
7737     //
7738     // map(s.ps)
7739     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7740     //
7741     // map(from: s.ps->s.i)
7742     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7743     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7744     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7745     //
7746     // map(to: s.ps->ps)
7747     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7748     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7749     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7750     //
7751     // map(s.ps->ps->ps)
7752     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7753     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7754     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7755     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7756     //
7757     // map(to: s.ps->ps->s.f[:22])
7758     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7759     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7760     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7761     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7762     //
7763     // map(ps)
7764     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7765     //
7766     // map(ps->i)
7767     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7768     //
7769     // map(ps->s.f)
7770     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7771     //
7772     // map(from: ps->p)
7773     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7774     //
7775     // map(to: ps->p[:22])
7776     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7777     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7778     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7779     //
7780     // map(ps->ps)
7781     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7782     //
7783     // map(from: ps->ps->s.i)
7784     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7785     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7786     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7787     //
7788     // map(from: ps->ps->ps)
7789     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7790     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7791     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7792     //
7793     // map(ps->ps->ps->ps)
7794     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7795     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7796     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7797     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7798     //
7799     // map(to: ps->ps->ps->s.f[:22])
7800     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7801     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7802     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7803     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7804     //
7805     // map(to: s.f[:22]) map(from: s.p[:33])
7806     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7807     //     sizeof(double*) (**), TARGET_PARAM
7808     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7809     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7810     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7811     // (*) allocate contiguous space needed to fit all mapped members even if
7812     //     we allocate space for members not mapped (in this example,
7813     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7814     //     them as well because they fall between &s.f[0] and &s.p)
7815     //
7816     // map(from: s.f[:22]) map(to: ps->p[:33])
7817     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7818     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7819     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7820     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7821     // (*) the struct this entry pertains to is the 2nd element in the list of
7822     //     arguments, hence MEMBER_OF(2)
7823     //
7824     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7825     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7826     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7827     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7828     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7829     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7830     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7831     // (*) the struct this entry pertains to is the 4th element in the list
7832     //     of arguments, hence MEMBER_OF(4)
7833 
7834     // Track if the map information being generated is the first for a capture.
7835     bool IsCaptureFirstInfo = IsFirstComponentList;
7836     // When the variable is on a declare target link or in a to clause with
7837     // unified memory, a reference is needed to hold the host/device address
7838     // of the variable.
7839     bool RequiresReference = false;
7840 
7841     // Scan the components from the base to the complete expression.
7842     auto CI = Components.rbegin();
7843     auto CE = Components.rend();
7844     auto I = CI;
7845 
7846     // Track if the map information being generated is the first for a list of
7847     // components.
7848     bool IsExpressionFirstInfo = true;
7849     bool FirstPointerInComplexData = false;
7850     Address BP = Address::invalid();
7851     const Expr *AssocExpr = I->getAssociatedExpression();
7852     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7853     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7854     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7855 
7856     if (isa<MemberExpr>(AssocExpr)) {
7857       // The base is the 'this' pointer. The content of the pointer is going
7858       // to be the base of the field being mapped.
7859       BP = CGF.LoadCXXThisAddress();
7860     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7861                (OASE &&
7862                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7863       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7864     } else if (OAShE &&
7865                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7866       BP = Address(
7867           CGF.EmitScalarExpr(OAShE->getBase()),
7868           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7869     } else {
7870       // The base is the reference to the variable.
7871       // BP = &Var.
7872       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7873       if (const auto *VD =
7874               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7875         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7876                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7877           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7878               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7879                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7880             RequiresReference = true;
7881             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7882           }
7883         }
7884       }
7885 
7886       // If the variable is a pointer and is being dereferenced (i.e. is not
7887       // the last component), the base has to be the pointer itself, not its
7888       // reference. References are ignored for mapping purposes.
7889       QualType Ty =
7890           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7891       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7892         // No need to generate individual map information for the pointer, it
7893         // can be associated with the combined storage if shared memory mode is
7894         // active or the base declaration is not global variable.
7895         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7896         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7897             !VD || VD->hasLocalStorage())
7898           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7899         else
7900           FirstPointerInComplexData = true;
7901         ++I;
7902       }
7903     }
7904 
7905     // Track whether a component of the list should be marked as MEMBER_OF some
7906     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7907     // in a component list should be marked as MEMBER_OF, all subsequent entries
7908     // do not belong to the base struct. E.g.
7909     // struct S2 s;
7910     // s.ps->ps->ps->f[:]
7911     //   (1) (2) (3) (4)
7912     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7913     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7914     // is the pointee of ps(2) which is not member of struct s, so it should not
7915     // be marked as such (it is still PTR_AND_OBJ).
7916     // The variable is initialized to false so that PTR_AND_OBJ entries which
7917     // are not struct members are not considered (e.g. array of pointers to
7918     // data).
7919     bool ShouldBeMemberOf = false;
7920 
7921     // Variable keeping track of whether or not we have encountered a component
7922     // in the component list which is a member expression. Useful when we have a
7923     // pointer or a final array section, in which case it is the previous
7924     // component in the list which tells us whether we have a member expression.
7925     // E.g. X.f[:]
7926     // While processing the final array section "[:]" it is "f" which tells us
7927     // whether we are dealing with a member of a declared struct.
7928     const MemberExpr *EncounteredME = nullptr;
7929 
7930     // Track for the total number of dimension. Start from one for the dummy
7931     // dimension.
7932     uint64_t DimSize = 1;
7933 
7934     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7935     bool IsPrevMemberReference = false;
7936 
7937     for (; I != CE; ++I) {
7938       // If the current component is member of a struct (parent struct) mark it.
7939       if (!EncounteredME) {
7940         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7941         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7942         // as MEMBER_OF the parent struct.
7943         if (EncounteredME) {
7944           ShouldBeMemberOf = true;
7945           // Do not emit as complex pointer if this is actually not array-like
7946           // expression.
7947           if (FirstPointerInComplexData) {
7948             QualType Ty = std::prev(I)
7949                               ->getAssociatedDeclaration()
7950                               ->getType()
7951                               .getNonReferenceType();
7952             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7953             FirstPointerInComplexData = false;
7954           }
7955         }
7956       }
7957 
7958       auto Next = std::next(I);
7959 
7960       // We need to generate the addresses and sizes if this is the last
7961       // component, if the component is a pointer or if it is an array section
7962       // whose length can't be proved to be one. If this is a pointer, it
7963       // becomes the base address for the following components.
7964 
7965       // A final array section, is one whose length can't be proved to be one.
7966       // If the map item is non-contiguous then we don't treat any array section
7967       // as final array section.
7968       bool IsFinalArraySection =
7969           !IsNonContiguous &&
7970           isFinalArraySectionExpression(I->getAssociatedExpression());
7971 
7972       // If we have a declaration for the mapping use that, otherwise use
7973       // the base declaration of the map clause.
7974       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7975                                      ? I->getAssociatedDeclaration()
7976                                      : BaseDecl;
7977       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7978                                                : MapExpr;
7979 
7980       // Get information on whether the element is a pointer. Have to do a
7981       // special treatment for array sections given that they are built-in
7982       // types.
7983       const auto *OASE =
7984           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7985       const auto *OAShE =
7986           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7987       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7988       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7989       bool IsPointer =
7990           OAShE ||
7991           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7992                        .getCanonicalType()
7993                        ->isAnyPointerType()) ||
7994           I->getAssociatedExpression()->getType()->isAnyPointerType();
7995       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7996                                MapDecl &&
7997                                MapDecl->getType()->isLValueReferenceType();
7998       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7999 
8000       if (OASE)
8001         ++DimSize;
8002 
8003       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8004           IsFinalArraySection) {
8005         // If this is not the last component, we expect the pointer to be
8006         // associated with an array expression or member expression.
8007         assert((Next == CE ||
8008                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8009                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8010                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8011                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8012                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8013                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8014                "Unexpected expression");
8015 
8016         Address LB = Address::invalid();
8017         Address LowestElem = Address::invalid();
8018         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8019                                        const MemberExpr *E) {
8020           const Expr *BaseExpr = E->getBase();
8021           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8022           // scalar.
8023           LValue BaseLV;
8024           if (E->isArrow()) {
8025             LValueBaseInfo BaseInfo;
8026             TBAAAccessInfo TBAAInfo;
8027             Address Addr =
8028                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8029             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8030             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8031           } else {
8032             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8033           }
8034           return BaseLV;
8035         };
8036         if (OAShE) {
8037           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8038                                     CGF.getContext().getTypeAlignInChars(
8039                                         OAShE->getBase()->getType()));
8040         } else if (IsMemberReference) {
8041           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8042           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8043           LowestElem = CGF.EmitLValueForFieldInitialization(
8044                               BaseLVal, cast<FieldDecl>(MapDecl))
8045                            .getAddress(CGF);
8046           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8047                    .getAddress(CGF);
8048         } else {
8049           LowestElem = LB =
8050               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8051                   .getAddress(CGF);
8052         }
8053 
8054         // If this component is a pointer inside the base struct then we don't
8055         // need to create any entry for it - it will be combined with the object
8056         // it is pointing to into a single PTR_AND_OBJ entry.
8057         bool IsMemberPointerOrAddr =
8058             EncounteredME &&
8059             (((IsPointer || ForDeviceAddr) &&
8060               I->getAssociatedExpression() == EncounteredME) ||
8061              (IsPrevMemberReference && !IsPointer) ||
8062              (IsMemberReference && Next != CE &&
8063               !Next->getAssociatedExpression()->getType()->isPointerType()));
8064         if (!OverlappedElements.empty() && Next == CE) {
8065           // Handle base element with the info for overlapped elements.
8066           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8067           assert(!IsPointer &&
8068                  "Unexpected base element with the pointer type.");
8069           // Mark the whole struct as the struct that requires allocation on the
8070           // device.
8071           PartialStruct.LowestElem = {0, LowestElem};
8072           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8073               I->getAssociatedExpression()->getType());
8074           Address HB = CGF.Builder.CreateConstGEP(
8075               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8076                                                               CGF.VoidPtrTy),
8077               TypeSize.getQuantity() - 1);
8078           PartialStruct.HighestElem = {
8079               std::numeric_limits<decltype(
8080                   PartialStruct.HighestElem.first)>::max(),
8081               HB};
8082           PartialStruct.Base = BP;
8083           PartialStruct.LB = LB;
8084           assert(
8085               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8086               "Overlapped elements must be used only once for the variable.");
8087           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8088           // Emit data for non-overlapped data.
8089           OpenMPOffloadMappingFlags Flags =
8090               OMP_MAP_MEMBER_OF |
8091               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8092                              /*AddPtrFlag=*/false,
8093                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8094           llvm::Value *Size = nullptr;
8095           // Do bitcopy of all non-overlapped structure elements.
8096           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8097                    Component : OverlappedElements) {
8098             Address ComponentLB = Address::invalid();
8099             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8100                  Component) {
8101               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8102                 const auto *FD = dyn_cast<FieldDecl>(VD);
8103                 if (FD && FD->getType()->isLValueReferenceType()) {
8104                   const auto *ME =
8105                       cast<MemberExpr>(MC.getAssociatedExpression());
8106                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8107                   ComponentLB =
8108                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8109                           .getAddress(CGF);
8110                 } else {
8111                   ComponentLB =
8112                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8113                           .getAddress(CGF);
8114                 }
8115                 Size = CGF.Builder.CreatePtrDiff(
8116                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8117                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8118                 break;
8119               }
8120             }
8121             assert(Size && "Failed to determine structure size");
8122             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8123             CombinedInfo.BasePointers.push_back(BP.getPointer());
8124             CombinedInfo.Pointers.push_back(LB.getPointer());
8125             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8126                 Size, CGF.Int64Ty, /*isSigned=*/true));
8127             CombinedInfo.Types.push_back(Flags);
8128             CombinedInfo.Mappers.push_back(nullptr);
8129             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8130                                                                       : 1);
8131             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8132           }
8133           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8134           CombinedInfo.BasePointers.push_back(BP.getPointer());
8135           CombinedInfo.Pointers.push_back(LB.getPointer());
8136           Size = CGF.Builder.CreatePtrDiff(
8137               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8138               CGF.EmitCastToVoidPtr(LB.getPointer()));
8139           CombinedInfo.Sizes.push_back(
8140               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8141           CombinedInfo.Types.push_back(Flags);
8142           CombinedInfo.Mappers.push_back(nullptr);
8143           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8144                                                                     : 1);
8145           break;
8146         }
8147         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8148         if (!IsMemberPointerOrAddr ||
8149             (Next == CE && MapType != OMPC_MAP_unknown)) {
8150           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8151           CombinedInfo.BasePointers.push_back(BP.getPointer());
8152           CombinedInfo.Pointers.push_back(LB.getPointer());
8153           CombinedInfo.Sizes.push_back(
8154               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8155           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8156                                                                     : 1);
8157 
8158           // If Mapper is valid, the last component inherits the mapper.
8159           bool HasMapper = Mapper && Next == CE;
8160           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8161 
8162           // We need to add a pointer flag for each map that comes from the
8163           // same expression except for the first one. We also need to signal
8164           // this map is the first one that relates with the current capture
8165           // (there is a set of entries for each capture).
8166           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8167               MapType, MapModifiers, MotionModifiers, IsImplicit,
8168               !IsExpressionFirstInfo || RequiresReference ||
8169                   FirstPointerInComplexData || IsMemberReference,
8170               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8171 
8172           if (!IsExpressionFirstInfo || IsMemberReference) {
8173             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8174             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8175             if (IsPointer || (IsMemberReference && Next != CE))
8176               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8177                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8178 
8179             if (ShouldBeMemberOf) {
8180               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8181               // should be later updated with the correct value of MEMBER_OF.
8182               Flags |= OMP_MAP_MEMBER_OF;
8183               // From now on, all subsequent PTR_AND_OBJ entries should not be
8184               // marked as MEMBER_OF.
8185               ShouldBeMemberOf = false;
8186             }
8187           }
8188 
8189           CombinedInfo.Types.push_back(Flags);
8190         }
8191 
8192         // If we have encountered a member expression so far, keep track of the
8193         // mapped member. If the parent is "*this", then the value declaration
8194         // is nullptr.
8195         if (EncounteredME) {
8196           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8197           unsigned FieldIndex = FD->getFieldIndex();
8198 
8199           // Update info about the lowest and highest elements for this struct
8200           if (!PartialStruct.Base.isValid()) {
8201             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8202             if (IsFinalArraySection) {
8203               Address HB =
8204                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8205                       .getAddress(CGF);
8206               PartialStruct.HighestElem = {FieldIndex, HB};
8207             } else {
8208               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8209             }
8210             PartialStruct.Base = BP;
8211             PartialStruct.LB = BP;
8212           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8213             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8214           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8215             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8216           }
8217         }
8218 
8219         // Need to emit combined struct for array sections.
8220         if (IsFinalArraySection || IsNonContiguous)
8221           PartialStruct.IsArraySection = true;
8222 
8223         // If we have a final array section, we are done with this expression.
8224         if (IsFinalArraySection)
8225           break;
8226 
8227         // The pointer becomes the base for the next element.
8228         if (Next != CE)
8229           BP = IsMemberReference ? LowestElem : LB;
8230 
8231         IsExpressionFirstInfo = false;
8232         IsCaptureFirstInfo = false;
8233         FirstPointerInComplexData = false;
8234         IsPrevMemberReference = IsMemberReference;
8235       } else if (FirstPointerInComplexData) {
8236         QualType Ty = Components.rbegin()
8237                           ->getAssociatedDeclaration()
8238                           ->getType()
8239                           .getNonReferenceType();
8240         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8241         FirstPointerInComplexData = false;
8242       }
8243     }
8244     // If ran into the whole component - allocate the space for the whole
8245     // record.
8246     if (!EncounteredME)
8247       PartialStruct.HasCompleteRecord = true;
8248 
8249     if (!IsNonContiguous)
8250       return;
8251 
8252     const ASTContext &Context = CGF.getContext();
8253 
8254     // For supporting stride in array section, we need to initialize the first
8255     // dimension size as 1, first offset as 0, and first count as 1
8256     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8257     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8258     MapValuesArrayTy CurStrides;
8259     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8260     uint64_t ElementTypeSize;
8261 
8262     // Collect Size information for each dimension and get the element size as
8263     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8264     // should be [10, 10] and the first stride is 4 btyes.
8265     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8266          Components) {
8267       const Expr *AssocExpr = Component.getAssociatedExpression();
8268       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8269 
8270       if (!OASE)
8271         continue;
8272 
8273       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8274       auto *CAT = Context.getAsConstantArrayType(Ty);
8275       auto *VAT = Context.getAsVariableArrayType(Ty);
8276 
8277       // We need all the dimension size except for the last dimension.
8278       assert((VAT || CAT || &Component == &*Components.begin()) &&
8279              "Should be either ConstantArray or VariableArray if not the "
8280              "first Component");
8281 
8282       // Get element size if CurStrides is empty.
8283       if (CurStrides.empty()) {
8284         const Type *ElementType = nullptr;
8285         if (CAT)
8286           ElementType = CAT->getElementType().getTypePtr();
8287         else if (VAT)
8288           ElementType = VAT->getElementType().getTypePtr();
8289         else
8290           assert(&Component == &*Components.begin() &&
8291                  "Only expect pointer (non CAT or VAT) when this is the "
8292                  "first Component");
8293         // If ElementType is null, then it means the base is a pointer
8294         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8295         // for next iteration.
8296         if (ElementType) {
8297           // For the case that having pointer as base, we need to remove one
8298           // level of indirection.
8299           if (&Component != &*Components.begin())
8300             ElementType = ElementType->getPointeeOrArrayElementType();
8301           ElementTypeSize =
8302               Context.getTypeSizeInChars(ElementType).getQuantity();
8303           CurStrides.push_back(
8304               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8305         }
8306       }
8307       // Get dimension value except for the last dimension since we don't need
8308       // it.
8309       if (DimSizes.size() < Components.size() - 1) {
8310         if (CAT)
8311           DimSizes.push_back(llvm::ConstantInt::get(
8312               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8313         else if (VAT)
8314           DimSizes.push_back(CGF.Builder.CreateIntCast(
8315               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8316               /*IsSigned=*/false));
8317       }
8318     }
8319 
8320     // Skip the dummy dimension since we have already have its information.
8321     auto DI = DimSizes.begin() + 1;
8322     // Product of dimension.
8323     llvm::Value *DimProd =
8324         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8325 
8326     // Collect info for non-contiguous. Notice that offset, count, and stride
8327     // are only meaningful for array-section, so we insert a null for anything
8328     // other than array-section.
8329     // Also, the size of offset, count, and stride are not the same as
8330     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8331     // count, and stride are the same as the number of non-contiguous
8332     // declaration in target update to/from clause.
8333     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8334          Components) {
8335       const Expr *AssocExpr = Component.getAssociatedExpression();
8336 
8337       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8338         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8339             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8340             /*isSigned=*/false);
8341         CurOffsets.push_back(Offset);
8342         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8343         CurStrides.push_back(CurStrides.back());
8344         continue;
8345       }
8346 
8347       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8348 
8349       if (!OASE)
8350         continue;
8351 
8352       // Offset
8353       const Expr *OffsetExpr = OASE->getLowerBound();
8354       llvm::Value *Offset = nullptr;
8355       if (!OffsetExpr) {
8356         // If offset is absent, then we just set it to zero.
8357         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8358       } else {
8359         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8360                                            CGF.Int64Ty,
8361                                            /*isSigned=*/false);
8362       }
8363       CurOffsets.push_back(Offset);
8364 
8365       // Count
8366       const Expr *CountExpr = OASE->getLength();
8367       llvm::Value *Count = nullptr;
8368       if (!CountExpr) {
8369         // In Clang, once a high dimension is an array section, we construct all
8370         // the lower dimension as array section, however, for case like
8371         // arr[0:2][2], Clang construct the inner dimension as an array section
8372         // but it actually is not in an array section form according to spec.
8373         if (!OASE->getColonLocFirst().isValid() &&
8374             !OASE->getColonLocSecond().isValid()) {
8375           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8376         } else {
8377           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8378           // When the length is absent it defaults to ⌈(size −
8379           // lower-bound)/stride⌉, where size is the size of the array
8380           // dimension.
8381           const Expr *StrideExpr = OASE->getStride();
8382           llvm::Value *Stride =
8383               StrideExpr
8384                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8385                                               CGF.Int64Ty, /*isSigned=*/false)
8386                   : nullptr;
8387           if (Stride)
8388             Count = CGF.Builder.CreateUDiv(
8389                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8390           else
8391             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8392         }
8393       } else {
8394         Count = CGF.EmitScalarExpr(CountExpr);
8395       }
8396       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8397       CurCounts.push_back(Count);
8398 
8399       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8400       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8401       //              Offset      Count     Stride
8402       //    D0          0           1         4    (int)    <- dummy dimension
8403       //    D1          0           2         8    (2 * (1) * 4)
8404       //    D2          1           2         20   (1 * (1 * 5) * 4)
8405       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8406       const Expr *StrideExpr = OASE->getStride();
8407       llvm::Value *Stride =
8408           StrideExpr
8409               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8410                                           CGF.Int64Ty, /*isSigned=*/false)
8411               : nullptr;
8412       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8413       if (Stride)
8414         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8415       else
8416         CurStrides.push_back(DimProd);
8417       if (DI != DimSizes.end())
8418         ++DI;
8419     }
8420 
8421     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8422     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8423     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8424   }
8425 
8426   /// Return the adjusted map modifiers if the declaration a capture refers to
8427   /// appears in a first-private clause. This is expected to be used only with
8428   /// directives that start with 'target'.
8429   MappableExprsHandler::OpenMPOffloadMappingFlags
8430   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8431     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8432 
8433     // A first private variable captured by reference will use only the
8434     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8435     // declaration is known as first-private in this handler.
8436     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8437       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8438         return MappableExprsHandler::OMP_MAP_TO |
8439                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8440       return MappableExprsHandler::OMP_MAP_PRIVATE |
8441              MappableExprsHandler::OMP_MAP_TO;
8442     }
8443     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8444     if (I != LambdasMap.end())
8445       // for map(to: lambda): using user specified map type.
8446       return getMapTypeBits(
8447           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8448           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8449           /*AddPtrFlag=*/false,
8450           /*AddIsTargetParamFlag=*/false,
8451           /*isNonContiguous=*/false);
8452     return MappableExprsHandler::OMP_MAP_TO |
8453            MappableExprsHandler::OMP_MAP_FROM;
8454   }
8455 
8456   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8457     // Rotate by getFlagMemberOffset() bits.
8458     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8459                                                   << getFlagMemberOffset());
8460   }
8461 
8462   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8463                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8464     // If the entry is PTR_AND_OBJ but has not been marked with the special
8465     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8466     // marked as MEMBER_OF.
8467     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8468         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8469       return;
8470 
8471     // Reset the placeholder value to prepare the flag for the assignment of the
8472     // proper MEMBER_OF value.
8473     Flags &= ~OMP_MAP_MEMBER_OF;
8474     Flags |= MemberOfFlag;
8475   }
8476 
8477   void getPlainLayout(const CXXRecordDecl *RD,
8478                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8479                       bool AsBase) const {
8480     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8481 
8482     llvm::StructType *St =
8483         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8484 
8485     unsigned NumElements = St->getNumElements();
8486     llvm::SmallVector<
8487         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8488         RecordLayout(NumElements);
8489 
8490     // Fill bases.
8491     for (const auto &I : RD->bases()) {
8492       if (I.isVirtual())
8493         continue;
8494       const auto *Base = I.getType()->getAsCXXRecordDecl();
8495       // Ignore empty bases.
8496       if (Base->isEmpty() || CGF.getContext()
8497                                  .getASTRecordLayout(Base)
8498                                  .getNonVirtualSize()
8499                                  .isZero())
8500         continue;
8501 
8502       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8503       RecordLayout[FieldIndex] = Base;
8504     }
8505     // Fill in virtual bases.
8506     for (const auto &I : RD->vbases()) {
8507       const auto *Base = I.getType()->getAsCXXRecordDecl();
8508       // Ignore empty bases.
8509       if (Base->isEmpty())
8510         continue;
8511       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8512       if (RecordLayout[FieldIndex])
8513         continue;
8514       RecordLayout[FieldIndex] = Base;
8515     }
8516     // Fill in all the fields.
8517     assert(!RD->isUnion() && "Unexpected union.");
8518     for (const auto *Field : RD->fields()) {
8519       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8520       // will fill in later.)
8521       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8522         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8523         RecordLayout[FieldIndex] = Field;
8524       }
8525     }
8526     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8527              &Data : RecordLayout) {
8528       if (Data.isNull())
8529         continue;
8530       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8531         getPlainLayout(Base, Layout, /*AsBase=*/true);
8532       else
8533         Layout.push_back(Data.get<const FieldDecl *>());
8534     }
8535   }
8536 
8537   /// Generate all the base pointers, section pointers, sizes, map types, and
8538   /// mappers for the extracted mappable expressions (all included in \a
8539   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8540   /// pair of the relevant declaration and index where it occurs is appended to
8541   /// the device pointers info array.
8542   void generateAllInfoForClauses(
8543       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8544       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8545           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8546     // We have to process the component lists that relate with the same
8547     // declaration in a single chunk so that we can generate the map flags
8548     // correctly. Therefore, we organize all lists in a map.
8549     enum MapKind { Present, Allocs, Other, Total };
8550     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8551                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8552         Info;
8553 
8554     // Helper function to fill the information map for the different supported
8555     // clauses.
8556     auto &&InfoGen =
8557         [&Info, &SkipVarSet](
8558             const ValueDecl *D, MapKind Kind,
8559             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8560             OpenMPMapClauseKind MapType,
8561             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8562             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8563             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8564             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8565           if (SkipVarSet.contains(D))
8566             return;
8567           auto It = Info.find(D);
8568           if (It == Info.end())
8569             It = Info
8570                      .insert(std::make_pair(
8571                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8572                      .first;
8573           It->second[Kind].emplace_back(
8574               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8575               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8576         };
8577 
8578     for (const auto *Cl : Clauses) {
8579       const auto *C = dyn_cast<OMPMapClause>(Cl);
8580       if (!C)
8581         continue;
8582       MapKind Kind = Other;
8583       if (!C->getMapTypeModifiers().empty() &&
8584           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8585             return K == OMPC_MAP_MODIFIER_present;
8586           }))
8587         Kind = Present;
8588       else if (C->getMapType() == OMPC_MAP_alloc)
8589         Kind = Allocs;
8590       const auto *EI = C->getVarRefs().begin();
8591       for (const auto L : C->component_lists()) {
8592         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8593         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8594                 C->getMapTypeModifiers(), llvm::None,
8595                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8596                 E);
8597         ++EI;
8598       }
8599     }
8600     for (const auto *Cl : Clauses) {
8601       const auto *C = dyn_cast<OMPToClause>(Cl);
8602       if (!C)
8603         continue;
8604       MapKind Kind = Other;
8605       if (!C->getMotionModifiers().empty() &&
8606           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8607             return K == OMPC_MOTION_MODIFIER_present;
8608           }))
8609         Kind = Present;
8610       const auto *EI = C->getVarRefs().begin();
8611       for (const auto L : C->component_lists()) {
8612         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8613                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8614                 C->isImplicit(), std::get<2>(L), *EI);
8615         ++EI;
8616       }
8617     }
8618     for (const auto *Cl : Clauses) {
8619       const auto *C = dyn_cast<OMPFromClause>(Cl);
8620       if (!C)
8621         continue;
8622       MapKind Kind = Other;
8623       if (!C->getMotionModifiers().empty() &&
8624           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8625             return K == OMPC_MOTION_MODIFIER_present;
8626           }))
8627         Kind = Present;
8628       const auto *EI = C->getVarRefs().begin();
8629       for (const auto L : C->component_lists()) {
8630         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8631                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8632                 C->isImplicit(), std::get<2>(L), *EI);
8633         ++EI;
8634       }
8635     }
8636 
8637     // Look at the use_device_ptr clause information and mark the existing map
8638     // entries as such. If there is no map information for an entry in the
8639     // use_device_ptr list, we create one with map type 'alloc' and zero size
8640     // section. It is the user fault if that was not mapped before. If there is
8641     // no map information and the pointer is a struct member, then we defer the
8642     // emission of that entry until the whole struct has been processed.
8643     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8644                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8645         DeferredInfo;
8646     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8647 
8648     for (const auto *Cl : Clauses) {
8649       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8650       if (!C)
8651         continue;
8652       for (const auto L : C->component_lists()) {
8653         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8654             std::get<1>(L);
8655         assert(!Components.empty() &&
8656                "Not expecting empty list of components!");
8657         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8658         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8659         const Expr *IE = Components.back().getAssociatedExpression();
8660         // If the first component is a member expression, we have to look into
8661         // 'this', which maps to null in the map of map information. Otherwise
8662         // look directly for the information.
8663         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8664 
8665         // We potentially have map information for this declaration already.
8666         // Look for the first set of components that refer to it.
8667         if (It != Info.end()) {
8668           bool Found = false;
8669           for (auto &Data : It->second) {
8670             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8671               return MI.Components.back().getAssociatedDeclaration() == VD;
8672             });
8673             // If we found a map entry, signal that the pointer has to be
8674             // returned and move on to the next declaration. Exclude cases where
8675             // the base pointer is mapped as array subscript, array section or
8676             // array shaping. The base address is passed as a pointer to base in
8677             // this case and cannot be used as a base for use_device_ptr list
8678             // item.
8679             if (CI != Data.end()) {
8680               auto PrevCI = std::next(CI->Components.rbegin());
8681               const auto *VarD = dyn_cast<VarDecl>(VD);
8682               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8683                   isa<MemberExpr>(IE) ||
8684                   !VD->getType().getNonReferenceType()->isPointerType() ||
8685                   PrevCI == CI->Components.rend() ||
8686                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8687                   VarD->hasLocalStorage()) {
8688                 CI->ReturnDevicePointer = true;
8689                 Found = true;
8690                 break;
8691               }
8692             }
8693           }
8694           if (Found)
8695             continue;
8696         }
8697 
8698         // We didn't find any match in our map information - generate a zero
8699         // size array section - if the pointer is a struct member we defer this
8700         // action until the whole struct has been processed.
8701         if (isa<MemberExpr>(IE)) {
8702           // Insert the pointer into Info to be processed by
8703           // generateInfoForComponentList. Because it is a member pointer
8704           // without a pointee, no entry will be generated for it, therefore
8705           // we need to generate one after the whole struct has been processed.
8706           // Nonetheless, generateInfoForComponentList must be called to take
8707           // the pointer into account for the calculation of the range of the
8708           // partial struct.
8709           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8710                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8711                   nullptr);
8712           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8713         } else {
8714           llvm::Value *Ptr =
8715               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8716           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8717           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8718           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8719           UseDevicePtrCombinedInfo.Sizes.push_back(
8720               llvm::Constant::getNullValue(CGF.Int64Ty));
8721           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8722           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8723         }
8724       }
8725     }
8726 
8727     // Look at the use_device_addr clause information and mark the existing map
8728     // entries as such. If there is no map information for an entry in the
8729     // use_device_addr list, we create one with map type 'alloc' and zero size
8730     // section. It is the user fault if that was not mapped before. If there is
8731     // no map information and the pointer is a struct member, then we defer the
8732     // emission of that entry until the whole struct has been processed.
8733     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8734     for (const auto *Cl : Clauses) {
8735       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8736       if (!C)
8737         continue;
8738       for (const auto L : C->component_lists()) {
8739         assert(!std::get<1>(L).empty() &&
8740                "Not expecting empty list of components!");
8741         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8742         if (!Processed.insert(VD).second)
8743           continue;
8744         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8745         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8746         // If the first component is a member expression, we have to look into
8747         // 'this', which maps to null in the map of map information. Otherwise
8748         // look directly for the information.
8749         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8750 
8751         // We potentially have map information for this declaration already.
8752         // Look for the first set of components that refer to it.
8753         if (It != Info.end()) {
8754           bool Found = false;
8755           for (auto &Data : It->second) {
8756             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8757               return MI.Components.back().getAssociatedDeclaration() == VD;
8758             });
8759             // If we found a map entry, signal that the pointer has to be
8760             // returned and move on to the next declaration.
8761             if (CI != Data.end()) {
8762               CI->ReturnDevicePointer = true;
8763               Found = true;
8764               break;
8765             }
8766           }
8767           if (Found)
8768             continue;
8769         }
8770 
8771         // We didn't find any match in our map information - generate a zero
8772         // size array section - if the pointer is a struct member we defer this
8773         // action until the whole struct has been processed.
8774         if (isa<MemberExpr>(IE)) {
8775           // Insert the pointer into Info to be processed by
8776           // generateInfoForComponentList. Because it is a member pointer
8777           // without a pointee, no entry will be generated for it, therefore
8778           // we need to generate one after the whole struct has been processed.
8779           // Nonetheless, generateInfoForComponentList must be called to take
8780           // the pointer into account for the calculation of the range of the
8781           // partial struct.
8782           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8783                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8784                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8785           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8786         } else {
8787           llvm::Value *Ptr;
8788           if (IE->isGLValue())
8789             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8790           else
8791             Ptr = CGF.EmitScalarExpr(IE);
8792           CombinedInfo.Exprs.push_back(VD);
8793           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8794           CombinedInfo.Pointers.push_back(Ptr);
8795           CombinedInfo.Sizes.push_back(
8796               llvm::Constant::getNullValue(CGF.Int64Ty));
8797           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8798           CombinedInfo.Mappers.push_back(nullptr);
8799         }
8800       }
8801     }
8802 
8803     for (const auto &Data : Info) {
8804       StructRangeInfoTy PartialStruct;
8805       // Temporary generated information.
8806       MapCombinedInfoTy CurInfo;
8807       const Decl *D = Data.first;
8808       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8809       for (const auto &M : Data.second) {
8810         for (const MapInfo &L : M) {
8811           assert(!L.Components.empty() &&
8812                  "Not expecting declaration with no component lists.");
8813 
8814           // Remember the current base pointer index.
8815           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8816           CurInfo.NonContigInfo.IsNonContiguous =
8817               L.Components.back().isNonContiguous();
8818           generateInfoForComponentList(
8819               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8820               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8821               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8822 
8823           // If this entry relates with a device pointer, set the relevant
8824           // declaration and add the 'return pointer' flag.
8825           if (L.ReturnDevicePointer) {
8826             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8827                    "Unexpected number of mapped base pointers.");
8828 
8829             const ValueDecl *RelevantVD =
8830                 L.Components.back().getAssociatedDeclaration();
8831             assert(RelevantVD &&
8832                    "No relevant declaration related with device pointer??");
8833 
8834             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8835                 RelevantVD);
8836             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8837           }
8838         }
8839       }
8840 
8841       // Append any pending zero-length pointers which are struct members and
8842       // used with use_device_ptr or use_device_addr.
8843       auto CI = DeferredInfo.find(Data.first);
8844       if (CI != DeferredInfo.end()) {
8845         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8846           llvm::Value *BasePtr;
8847           llvm::Value *Ptr;
8848           if (L.ForDeviceAddr) {
8849             if (L.IE->isGLValue())
8850               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8851             else
8852               Ptr = this->CGF.EmitScalarExpr(L.IE);
8853             BasePtr = Ptr;
8854             // Entry is RETURN_PARAM. Also, set the placeholder value
8855             // MEMBER_OF=FFFF so that the entry is later updated with the
8856             // correct value of MEMBER_OF.
8857             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8858           } else {
8859             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8860             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8861                                              L.IE->getExprLoc());
8862             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8863             // placeholder value MEMBER_OF=FFFF so that the entry is later
8864             // updated with the correct value of MEMBER_OF.
8865             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8866                                     OMP_MAP_MEMBER_OF);
8867           }
8868           CurInfo.Exprs.push_back(L.VD);
8869           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8870           CurInfo.Pointers.push_back(Ptr);
8871           CurInfo.Sizes.push_back(
8872               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8873           CurInfo.Mappers.push_back(nullptr);
8874         }
8875       }
8876       // If there is an entry in PartialStruct it means we have a struct with
8877       // individual members mapped. Emit an extra combined entry.
8878       if (PartialStruct.Base.isValid()) {
8879         CurInfo.NonContigInfo.Dims.push_back(0);
8880         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8881       }
8882 
8883       // We need to append the results of this capture to what we already
8884       // have.
8885       CombinedInfo.append(CurInfo);
8886     }
8887     // Append data for use_device_ptr clauses.
8888     CombinedInfo.append(UseDevicePtrCombinedInfo);
8889   }
8890 
8891 public:
8892   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8893       : CurDir(&Dir), CGF(CGF) {
8894     // Extract firstprivate clause information.
8895     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8896       for (const auto *D : C->varlists())
8897         FirstPrivateDecls.try_emplace(
8898             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8899     // Extract implicit firstprivates from uses_allocators clauses.
8900     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8901       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8902         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8903         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8904           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8905                                         /*Implicit=*/true);
8906         else if (const auto *VD = dyn_cast<VarDecl>(
8907                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8908                          ->getDecl()))
8909           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8910       }
8911     }
8912     // Extract device pointer clause information.
8913     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8914       for (auto L : C->component_lists())
8915         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8916     // Extract map information.
8917     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8918       if (C->getMapType() != OMPC_MAP_to)
8919         continue;
8920       for (auto L : C->component_lists()) {
8921         const ValueDecl *VD = std::get<0>(L);
8922         const auto *RD = VD ? VD->getType()
8923                                   .getCanonicalType()
8924                                   .getNonReferenceType()
8925                                   ->getAsCXXRecordDecl()
8926                             : nullptr;
8927         if (RD && RD->isLambda())
8928           LambdasMap.try_emplace(std::get<0>(L), C);
8929       }
8930     }
8931   }
8932 
8933   /// Constructor for the declare mapper directive.
8934   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8935       : CurDir(&Dir), CGF(CGF) {}
8936 
8937   /// Generate code for the combined entry if we have a partially mapped struct
8938   /// and take care of the mapping flags of the arguments corresponding to
8939   /// individual struct members.
8940   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8941                          MapFlagsArrayTy &CurTypes,
8942                          const StructRangeInfoTy &PartialStruct,
8943                          const ValueDecl *VD = nullptr,
8944                          bool NotTargetParams = true) const {
8945     if (CurTypes.size() == 1 &&
8946         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8947         !PartialStruct.IsArraySection)
8948       return;
8949     Address LBAddr = PartialStruct.LowestElem.second;
8950     Address HBAddr = PartialStruct.HighestElem.second;
8951     if (PartialStruct.HasCompleteRecord) {
8952       LBAddr = PartialStruct.LB;
8953       HBAddr = PartialStruct.LB;
8954     }
8955     CombinedInfo.Exprs.push_back(VD);
8956     // Base is the base of the struct
8957     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8958     // Pointer is the address of the lowest element
8959     llvm::Value *LB = LBAddr.getPointer();
8960     CombinedInfo.Pointers.push_back(LB);
8961     // There should not be a mapper for a combined entry.
8962     CombinedInfo.Mappers.push_back(nullptr);
8963     // Size is (addr of {highest+1} element) - (addr of lowest element)
8964     llvm::Value *HB = HBAddr.getPointer();
8965     llvm::Value *HAddr =
8966         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8967     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8968     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8969     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8970     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8971                                                   /*isSigned=*/false);
8972     CombinedInfo.Sizes.push_back(Size);
8973     // Map type is always TARGET_PARAM, if generate info for captures.
8974     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8975                                                  : OMP_MAP_TARGET_PARAM);
8976     // If any element has the present modifier, then make sure the runtime
8977     // doesn't attempt to allocate the struct.
8978     if (CurTypes.end() !=
8979         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8980           return Type & OMP_MAP_PRESENT;
8981         }))
8982       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8983     // Remove TARGET_PARAM flag from the first element
8984     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8985     // If any element has the ompx_hold modifier, then make sure the runtime
8986     // uses the hold reference count for the struct as a whole so that it won't
8987     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8988     // elements as well so the runtime knows which reference count to check
8989     // when determining whether it's time for device-to-host transfers of
8990     // individual elements.
8991     if (CurTypes.end() !=
8992         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8993           return Type & OMP_MAP_OMPX_HOLD;
8994         })) {
8995       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8996       for (auto &M : CurTypes)
8997         M |= OMP_MAP_OMPX_HOLD;
8998     }
8999 
9000     // All other current entries will be MEMBER_OF the combined entry
9001     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9002     // 0xFFFF in the MEMBER_OF field).
9003     OpenMPOffloadMappingFlags MemberOfFlag =
9004         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9005     for (auto &M : CurTypes)
9006       setCorrectMemberOfFlag(M, MemberOfFlag);
9007   }
9008 
9009   /// Generate all the base pointers, section pointers, sizes, map types, and
9010   /// mappers for the extracted mappable expressions (all included in \a
9011   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9012   /// pair of the relevant declaration and index where it occurs is appended to
9013   /// the device pointers info array.
9014   void generateAllInfo(
9015       MapCombinedInfoTy &CombinedInfo,
9016       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9017           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9018     assert(CurDir.is<const OMPExecutableDirective *>() &&
9019            "Expect a executable directive");
9020     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9021     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9022   }
9023 
9024   /// Generate all the base pointers, section pointers, sizes, map types, and
9025   /// mappers for the extracted map clauses of user-defined mapper (all included
9026   /// in \a CombinedInfo).
9027   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9028     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9029            "Expect a declare mapper directive");
9030     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9031     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9032   }
9033 
9034   /// Emit capture info for lambdas for variables captured by reference.
9035   void generateInfoForLambdaCaptures(
9036       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9037       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9038     const auto *RD = VD->getType()
9039                          .getCanonicalType()
9040                          .getNonReferenceType()
9041                          ->getAsCXXRecordDecl();
9042     if (!RD || !RD->isLambda())
9043       return;
9044     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
9045     LValue VDLVal = CGF.MakeAddrLValue(
9046         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9047     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9048     FieldDecl *ThisCapture = nullptr;
9049     RD->getCaptureFields(Captures, ThisCapture);
9050     if (ThisCapture) {
9051       LValue ThisLVal =
9052           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9053       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9054       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9055                                  VDLVal.getPointer(CGF));
9056       CombinedInfo.Exprs.push_back(VD);
9057       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9058       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9059       CombinedInfo.Sizes.push_back(
9060           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9061                                     CGF.Int64Ty, /*isSigned=*/true));
9062       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9063                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9064       CombinedInfo.Mappers.push_back(nullptr);
9065     }
9066     for (const LambdaCapture &LC : RD->captures()) {
9067       if (!LC.capturesVariable())
9068         continue;
9069       const VarDecl *VD = LC.getCapturedVar();
9070       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9071         continue;
9072       auto It = Captures.find(VD);
9073       assert(It != Captures.end() && "Found lambda capture without field.");
9074       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9075       if (LC.getCaptureKind() == LCK_ByRef) {
9076         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9077         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9078                                    VDLVal.getPointer(CGF));
9079         CombinedInfo.Exprs.push_back(VD);
9080         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9081         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9082         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9083             CGF.getTypeSize(
9084                 VD->getType().getCanonicalType().getNonReferenceType()),
9085             CGF.Int64Ty, /*isSigned=*/true));
9086       } else {
9087         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9088         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9089                                    VDLVal.getPointer(CGF));
9090         CombinedInfo.Exprs.push_back(VD);
9091         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9092         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9093         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9094       }
9095       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9096                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9097       CombinedInfo.Mappers.push_back(nullptr);
9098     }
9099   }
9100 
9101   /// Set correct indices for lambdas captures.
9102   void adjustMemberOfForLambdaCaptures(
9103       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9104       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9105       MapFlagsArrayTy &Types) const {
9106     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9107       // Set correct member_of idx for all implicit lambda captures.
9108       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9109                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9110         continue;
9111       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9112       assert(BasePtr && "Unable to find base lambda address.");
9113       int TgtIdx = -1;
9114       for (unsigned J = I; J > 0; --J) {
9115         unsigned Idx = J - 1;
9116         if (Pointers[Idx] != BasePtr)
9117           continue;
9118         TgtIdx = Idx;
9119         break;
9120       }
9121       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9122       // All other current entries will be MEMBER_OF the combined entry
9123       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9124       // 0xFFFF in the MEMBER_OF field).
9125       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9126       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9127     }
9128   }
9129 
9130   /// Generate the base pointers, section pointers, sizes, map types, and
9131   /// mappers associated to a given capture (all included in \a CombinedInfo).
9132   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9133                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9134                               StructRangeInfoTy &PartialStruct) const {
9135     assert(!Cap->capturesVariableArrayType() &&
9136            "Not expecting to generate map info for a variable array type!");
9137 
9138     // We need to know when we generating information for the first component
9139     const ValueDecl *VD = Cap->capturesThis()
9140                               ? nullptr
9141                               : Cap->getCapturedVar()->getCanonicalDecl();
9142 
9143     // for map(to: lambda): skip here, processing it in
9144     // generateDefaultMapInfo
9145     if (LambdasMap.count(VD))
9146       return;
9147 
9148     // If this declaration appears in a is_device_ptr clause we just have to
9149     // pass the pointer by value. If it is a reference to a declaration, we just
9150     // pass its value.
9151     if (DevPointersMap.count(VD)) {
9152       CombinedInfo.Exprs.push_back(VD);
9153       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9154       CombinedInfo.Pointers.push_back(Arg);
9155       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9156           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9157           /*isSigned=*/true));
9158       CombinedInfo.Types.push_back(
9159           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9160           OMP_MAP_TARGET_PARAM);
9161       CombinedInfo.Mappers.push_back(nullptr);
9162       return;
9163     }
9164 
9165     using MapData =
9166         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9167                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9168                    const ValueDecl *, const Expr *>;
9169     SmallVector<MapData, 4> DeclComponentLists;
9170     assert(CurDir.is<const OMPExecutableDirective *>() &&
9171            "Expect a executable directive");
9172     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9173     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9174       const auto *EI = C->getVarRefs().begin();
9175       for (const auto L : C->decl_component_lists(VD)) {
9176         const ValueDecl *VDecl, *Mapper;
9177         // The Expression is not correct if the mapping is implicit
9178         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9179         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9180         std::tie(VDecl, Components, Mapper) = L;
9181         assert(VDecl == VD && "We got information for the wrong declaration??");
9182         assert(!Components.empty() &&
9183                "Not expecting declaration with no component lists.");
9184         DeclComponentLists.emplace_back(Components, C->getMapType(),
9185                                         C->getMapTypeModifiers(),
9186                                         C->isImplicit(), Mapper, E);
9187         ++EI;
9188       }
9189     }
9190     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9191                                              const MapData &RHS) {
9192       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9193       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9194       bool HasPresent = !MapModifiers.empty() &&
9195                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9196                           return K == clang::OMPC_MAP_MODIFIER_present;
9197                         });
9198       bool HasAllocs = MapType == OMPC_MAP_alloc;
9199       MapModifiers = std::get<2>(RHS);
9200       MapType = std::get<1>(LHS);
9201       bool HasPresentR =
9202           !MapModifiers.empty() &&
9203           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9204             return K == clang::OMPC_MAP_MODIFIER_present;
9205           });
9206       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9207       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9208     });
9209 
9210     // Find overlapping elements (including the offset from the base element).
9211     llvm::SmallDenseMap<
9212         const MapData *,
9213         llvm::SmallVector<
9214             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9215         4>
9216         OverlappedData;
9217     size_t Count = 0;
9218     for (const MapData &L : DeclComponentLists) {
9219       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9220       OpenMPMapClauseKind MapType;
9221       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9222       bool IsImplicit;
9223       const ValueDecl *Mapper;
9224       const Expr *VarRef;
9225       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9226           L;
9227       ++Count;
9228       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9229         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9230         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9231                  VarRef) = L1;
9232         auto CI = Components.rbegin();
9233         auto CE = Components.rend();
9234         auto SI = Components1.rbegin();
9235         auto SE = Components1.rend();
9236         for (; CI != CE && SI != SE; ++CI, ++SI) {
9237           if (CI->getAssociatedExpression()->getStmtClass() !=
9238               SI->getAssociatedExpression()->getStmtClass())
9239             break;
9240           // Are we dealing with different variables/fields?
9241           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9242             break;
9243         }
9244         // Found overlapping if, at least for one component, reached the head
9245         // of the components list.
9246         if (CI == CE || SI == SE) {
9247           // Ignore it if it is the same component.
9248           if (CI == CE && SI == SE)
9249             continue;
9250           const auto It = (SI == SE) ? CI : SI;
9251           // If one component is a pointer and another one is a kind of
9252           // dereference of this pointer (array subscript, section, dereference,
9253           // etc.), it is not an overlapping.
9254           // Same, if one component is a base and another component is a
9255           // dereferenced pointer memberexpr with the same base.
9256           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9257               (std::prev(It)->getAssociatedDeclaration() &&
9258                std::prev(It)
9259                    ->getAssociatedDeclaration()
9260                    ->getType()
9261                    ->isPointerType()) ||
9262               (It->getAssociatedDeclaration() &&
9263                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9264                std::next(It) != CE && std::next(It) != SE))
9265             continue;
9266           const MapData &BaseData = CI == CE ? L : L1;
9267           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9268               SI == SE ? Components : Components1;
9269           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9270           OverlappedElements.getSecond().push_back(SubData);
9271         }
9272       }
9273     }
9274     // Sort the overlapped elements for each item.
9275     llvm::SmallVector<const FieldDecl *, 4> Layout;
9276     if (!OverlappedData.empty()) {
9277       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9278       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9279       while (BaseType != OrigType) {
9280         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9281         OrigType = BaseType->getPointeeOrArrayElementType();
9282       }
9283 
9284       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9285         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9286       else {
9287         const auto *RD = BaseType->getAsRecordDecl();
9288         Layout.append(RD->field_begin(), RD->field_end());
9289       }
9290     }
9291     for (auto &Pair : OverlappedData) {
9292       llvm::stable_sort(
9293           Pair.getSecond(),
9294           [&Layout](
9295               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9296               OMPClauseMappableExprCommon::MappableExprComponentListRef
9297                   Second) {
9298             auto CI = First.rbegin();
9299             auto CE = First.rend();
9300             auto SI = Second.rbegin();
9301             auto SE = Second.rend();
9302             for (; CI != CE && SI != SE; ++CI, ++SI) {
9303               if (CI->getAssociatedExpression()->getStmtClass() !=
9304                   SI->getAssociatedExpression()->getStmtClass())
9305                 break;
9306               // Are we dealing with different variables/fields?
9307               if (CI->getAssociatedDeclaration() !=
9308                   SI->getAssociatedDeclaration())
9309                 break;
9310             }
9311 
9312             // Lists contain the same elements.
9313             if (CI == CE && SI == SE)
9314               return false;
9315 
9316             // List with less elements is less than list with more elements.
9317             if (CI == CE || SI == SE)
9318               return CI == CE;
9319 
9320             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9321             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9322             if (FD1->getParent() == FD2->getParent())
9323               return FD1->getFieldIndex() < FD2->getFieldIndex();
9324             const auto *It =
9325                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9326                   return FD == FD1 || FD == FD2;
9327                 });
9328             return *It == FD1;
9329           });
9330     }
9331 
9332     // Associated with a capture, because the mapping flags depend on it.
9333     // Go through all of the elements with the overlapped elements.
9334     bool IsFirstComponentList = true;
9335     for (const auto &Pair : OverlappedData) {
9336       const MapData &L = *Pair.getFirst();
9337       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9338       OpenMPMapClauseKind MapType;
9339       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9340       bool IsImplicit;
9341       const ValueDecl *Mapper;
9342       const Expr *VarRef;
9343       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9344           L;
9345       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9346           OverlappedComponents = Pair.getSecond();
9347       generateInfoForComponentList(
9348           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9349           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9350           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9351       IsFirstComponentList = false;
9352     }
9353     // Go through other elements without overlapped elements.
9354     for (const MapData &L : DeclComponentLists) {
9355       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9356       OpenMPMapClauseKind MapType;
9357       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9358       bool IsImplicit;
9359       const ValueDecl *Mapper;
9360       const Expr *VarRef;
9361       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9362           L;
9363       auto It = OverlappedData.find(&L);
9364       if (It == OverlappedData.end())
9365         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9366                                      Components, CombinedInfo, PartialStruct,
9367                                      IsFirstComponentList, IsImplicit, Mapper,
9368                                      /*ForDeviceAddr=*/false, VD, VarRef);
9369       IsFirstComponentList = false;
9370     }
9371   }
9372 
9373   /// Generate the default map information for a given capture \a CI,
9374   /// record field declaration \a RI and captured value \a CV.
9375   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9376                               const FieldDecl &RI, llvm::Value *CV,
9377                               MapCombinedInfoTy &CombinedInfo) const {
9378     bool IsImplicit = true;
9379     // Do the default mapping.
9380     if (CI.capturesThis()) {
9381       CombinedInfo.Exprs.push_back(nullptr);
9382       CombinedInfo.BasePointers.push_back(CV);
9383       CombinedInfo.Pointers.push_back(CV);
9384       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9385       CombinedInfo.Sizes.push_back(
9386           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9387                                     CGF.Int64Ty, /*isSigned=*/true));
9388       // Default map type.
9389       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9390     } else if (CI.capturesVariableByCopy()) {
9391       const VarDecl *VD = CI.getCapturedVar();
9392       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9393       CombinedInfo.BasePointers.push_back(CV);
9394       CombinedInfo.Pointers.push_back(CV);
9395       if (!RI.getType()->isAnyPointerType()) {
9396         // We have to signal to the runtime captures passed by value that are
9397         // not pointers.
9398         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9399         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9400             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9401       } else {
9402         // Pointers are implicitly mapped with a zero size and no flags
9403         // (other than first map that is added for all implicit maps).
9404         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9405         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9406       }
9407       auto I = FirstPrivateDecls.find(VD);
9408       if (I != FirstPrivateDecls.end())
9409         IsImplicit = I->getSecond();
9410     } else {
9411       assert(CI.capturesVariable() && "Expected captured reference.");
9412       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9413       QualType ElementType = PtrTy->getPointeeType();
9414       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9415           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9416       // The default map type for a scalar/complex type is 'to' because by
9417       // default the value doesn't have to be retrieved. For an aggregate
9418       // type, the default is 'tofrom'.
9419       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9420       const VarDecl *VD = CI.getCapturedVar();
9421       auto I = FirstPrivateDecls.find(VD);
9422       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9423       CombinedInfo.BasePointers.push_back(CV);
9424       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9425         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9426             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9427             AlignmentSource::Decl));
9428         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9429       } else {
9430         CombinedInfo.Pointers.push_back(CV);
9431       }
9432       if (I != FirstPrivateDecls.end())
9433         IsImplicit = I->getSecond();
9434     }
9435     // Every default map produces a single argument which is a target parameter.
9436     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9437 
9438     // Add flag stating this is an implicit map.
9439     if (IsImplicit)
9440       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9441 
9442     // No user-defined mapper for default mapping.
9443     CombinedInfo.Mappers.push_back(nullptr);
9444   }
9445 };
9446 } // anonymous namespace
9447 
9448 static void emitNonContiguousDescriptor(
9449     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9450     CGOpenMPRuntime::TargetDataInfo &Info) {
9451   CodeGenModule &CGM = CGF.CGM;
9452   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9453       &NonContigInfo = CombinedInfo.NonContigInfo;
9454 
9455   // Build an array of struct descriptor_dim and then assign it to
9456   // offload_args.
9457   //
9458   // struct descriptor_dim {
9459   //  uint64_t offset;
9460   //  uint64_t count;
9461   //  uint64_t stride
9462   // };
9463   ASTContext &C = CGF.getContext();
9464   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9465   RecordDecl *RD;
9466   RD = C.buildImplicitRecord("descriptor_dim");
9467   RD->startDefinition();
9468   addFieldToRecordDecl(C, RD, Int64Ty);
9469   addFieldToRecordDecl(C, RD, Int64Ty);
9470   addFieldToRecordDecl(C, RD, Int64Ty);
9471   RD->completeDefinition();
9472   QualType DimTy = C.getRecordType(RD);
9473 
9474   enum { OffsetFD = 0, CountFD, StrideFD };
9475   // We need two index variable here since the size of "Dims" is the same as the
9476   // size of Components, however, the size of offset, count, and stride is equal
9477   // to the size of base declaration that is non-contiguous.
9478   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9479     // Skip emitting ir if dimension size is 1 since it cannot be
9480     // non-contiguous.
9481     if (NonContigInfo.Dims[I] == 1)
9482       continue;
9483     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9484     QualType ArrayTy =
9485         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9486     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9487     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9488       unsigned RevIdx = EE - II - 1;
9489       LValue DimsLVal = CGF.MakeAddrLValue(
9490           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9491       // Offset
9492       LValue OffsetLVal = CGF.EmitLValueForField(
9493           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9494       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9495       // Count
9496       LValue CountLVal = CGF.EmitLValueForField(
9497           DimsLVal, *std::next(RD->field_begin(), CountFD));
9498       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9499       // Stride
9500       LValue StrideLVal = CGF.EmitLValueForField(
9501           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9502       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9503     }
9504     // args[I] = &dims
9505     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9506         DimsAddr, CGM.Int8PtrTy);
9507     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9508         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9509         Info.PointersArray, 0, I);
9510     Address PAddr(P, CGF.getPointerAlign());
9511     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9512     ++L;
9513   }
9514 }
9515 
9516 // Try to extract the base declaration from a `this->x` expression if possible.
9517 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9518   if (!E)
9519     return nullptr;
9520 
9521   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9522     if (const MemberExpr *ME =
9523             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9524       return ME->getMemberDecl();
9525   return nullptr;
9526 }
9527 
9528 /// Emit a string constant containing the names of the values mapped to the
9529 /// offloading runtime library.
9530 llvm::Constant *
9531 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9532                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9533 
9534   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9535     return OMPBuilder.getOrCreateDefaultSrcLocStr();
9536 
9537   SourceLocation Loc;
9538   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9539     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9540       Loc = VD->getLocation();
9541     else
9542       Loc = MapExprs.getMapExpr()->getExprLoc();
9543   } else {
9544     Loc = MapExprs.getMapDecl()->getLocation();
9545   }
9546 
9547   std::string ExprName = "";
9548   if (MapExprs.getMapExpr()) {
9549     PrintingPolicy P(CGF.getContext().getLangOpts());
9550     llvm::raw_string_ostream OS(ExprName);
9551     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9552     OS.flush();
9553   } else {
9554     ExprName = MapExprs.getMapDecl()->getNameAsString();
9555   }
9556 
9557   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9558   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(),
9559                                          PLoc.getLine(), PLoc.getColumn());
9560 }
9561 
9562 /// Emit the arrays used to pass the captures and map information to the
9563 /// offloading runtime library. If there is no map or capture information,
9564 /// return nullptr by reference.
9565 static void emitOffloadingArrays(
9566     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9567     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9568     bool IsNonContiguous = false) {
9569   CodeGenModule &CGM = CGF.CGM;
9570   ASTContext &Ctx = CGF.getContext();
9571 
9572   // Reset the array information.
9573   Info.clearArrayInfo();
9574   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9575 
9576   if (Info.NumberOfPtrs) {
9577     // Detect if we have any capture size requiring runtime evaluation of the
9578     // size so that a constant array could be eventually used.
9579     bool hasRuntimeEvaluationCaptureSize = false;
9580     for (llvm::Value *S : CombinedInfo.Sizes)
9581       if (!isa<llvm::Constant>(S)) {
9582         hasRuntimeEvaluationCaptureSize = true;
9583         break;
9584       }
9585 
9586     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9587     QualType PointerArrayType = Ctx.getConstantArrayType(
9588         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9589         /*IndexTypeQuals=*/0);
9590 
9591     Info.BasePointersArray =
9592         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9593     Info.PointersArray =
9594         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9595     Address MappersArray =
9596         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9597     Info.MappersArray = MappersArray.getPointer();
9598 
9599     // If we don't have any VLA types or other types that require runtime
9600     // evaluation, we can use a constant array for the map sizes, otherwise we
9601     // need to fill up the arrays as we do for the pointers.
9602     QualType Int64Ty =
9603         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9604     if (hasRuntimeEvaluationCaptureSize) {
9605       QualType SizeArrayType = Ctx.getConstantArrayType(
9606           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9607           /*IndexTypeQuals=*/0);
9608       Info.SizesArray =
9609           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9610     } else {
9611       // We expect all the sizes to be constant, so we collect them to create
9612       // a constant array.
9613       SmallVector<llvm::Constant *, 16> ConstSizes;
9614       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9615         if (IsNonContiguous &&
9616             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9617           ConstSizes.push_back(llvm::ConstantInt::get(
9618               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9619         } else {
9620           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9621         }
9622       }
9623 
9624       auto *SizesArrayInit = llvm::ConstantArray::get(
9625           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9626       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9627       auto *SizesArrayGbl = new llvm::GlobalVariable(
9628           CGM.getModule(), SizesArrayInit->getType(),
9629           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9630           SizesArrayInit, Name);
9631       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9632       Info.SizesArray = SizesArrayGbl;
9633     }
9634 
9635     // The map types are always constant so we don't need to generate code to
9636     // fill arrays. Instead, we create an array constant.
9637     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9638     llvm::copy(CombinedInfo.Types, Mapping.begin());
9639     std::string MaptypesName =
9640         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9641     auto *MapTypesArrayGbl =
9642         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9643     Info.MapTypesArray = MapTypesArrayGbl;
9644 
9645     // The information types are only built if there is debug information
9646     // requested.
9647     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9648       Info.MapNamesArray = llvm::Constant::getNullValue(
9649           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9650     } else {
9651       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9652         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9653       };
9654       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9655       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9656       std::string MapnamesName =
9657           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9658       auto *MapNamesArrayGbl =
9659           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9660       Info.MapNamesArray = MapNamesArrayGbl;
9661     }
9662 
9663     // If there's a present map type modifier, it must not be applied to the end
9664     // of a region, so generate a separate map type array in that case.
9665     if (Info.separateBeginEndCalls()) {
9666       bool EndMapTypesDiffer = false;
9667       for (uint64_t &Type : Mapping) {
9668         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9669           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9670           EndMapTypesDiffer = true;
9671         }
9672       }
9673       if (EndMapTypesDiffer) {
9674         MapTypesArrayGbl =
9675             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9676         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9677       }
9678     }
9679 
9680     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9681       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9682       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9683           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9684           Info.BasePointersArray, 0, I);
9685       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9686           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9687       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9688       CGF.Builder.CreateStore(BPVal, BPAddr);
9689 
9690       if (Info.requiresDevicePointerInfo())
9691         if (const ValueDecl *DevVD =
9692                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9693           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9694 
9695       llvm::Value *PVal = CombinedInfo.Pointers[I];
9696       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9697           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9698           Info.PointersArray, 0, I);
9699       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9700           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9701       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9702       CGF.Builder.CreateStore(PVal, PAddr);
9703 
9704       if (hasRuntimeEvaluationCaptureSize) {
9705         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9706             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9707             Info.SizesArray,
9708             /*Idx0=*/0,
9709             /*Idx1=*/I);
9710         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9711         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9712                                                           CGM.Int64Ty,
9713                                                           /*isSigned=*/true),
9714                                 SAddr);
9715       }
9716 
9717       // Fill up the mapper array.
9718       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9719       if (CombinedInfo.Mappers[I]) {
9720         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9721             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9722         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9723         Info.HasMapper = true;
9724       }
9725       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9726       CGF.Builder.CreateStore(MFunc, MAddr);
9727     }
9728   }
9729 
9730   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9731       Info.NumberOfPtrs == 0)
9732     return;
9733 
9734   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9735 }
9736 
9737 namespace {
9738 /// Additional arguments for emitOffloadingArraysArgument function.
9739 struct ArgumentsOptions {
9740   bool ForEndCall = false;
9741   ArgumentsOptions() = default;
9742   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9743 };
9744 } // namespace
9745 
9746 /// Emit the arguments to be passed to the runtime library based on the
9747 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9748 /// ForEndCall, emit map types to be passed for the end of the region instead of
9749 /// the beginning.
9750 static void emitOffloadingArraysArgument(
9751     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9752     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9753     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9754     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9755     const ArgumentsOptions &Options = ArgumentsOptions()) {
9756   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9757          "expected region end call to runtime only when end call is separate");
9758   CodeGenModule &CGM = CGF.CGM;
9759   if (Info.NumberOfPtrs) {
9760     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9761         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9762         Info.BasePointersArray,
9763         /*Idx0=*/0, /*Idx1=*/0);
9764     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9765         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9766         Info.PointersArray,
9767         /*Idx0=*/0,
9768         /*Idx1=*/0);
9769     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9770         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9771         /*Idx0=*/0, /*Idx1=*/0);
9772     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9773         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9774         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9775                                                     : Info.MapTypesArray,
9776         /*Idx0=*/0,
9777         /*Idx1=*/0);
9778 
9779     // Only emit the mapper information arrays if debug information is
9780     // requested.
9781     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9782       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9783     else
9784       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9785           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9786           Info.MapNamesArray,
9787           /*Idx0=*/0,
9788           /*Idx1=*/0);
9789     // If there is no user-defined mapper, set the mapper array to nullptr to
9790     // avoid an unnecessary data privatization
9791     if (!Info.HasMapper)
9792       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9793     else
9794       MappersArrayArg =
9795           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9796   } else {
9797     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9798     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9799     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9800     MapTypesArrayArg =
9801         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9802     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9803     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9804   }
9805 }
9806 
9807 /// Check for inner distribute directive.
9808 static const OMPExecutableDirective *
9809 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9810   const auto *CS = D.getInnermostCapturedStmt();
9811   const auto *Body =
9812       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9813   const Stmt *ChildStmt =
9814       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9815 
9816   if (const auto *NestedDir =
9817           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9818     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9819     switch (D.getDirectiveKind()) {
9820     case OMPD_target:
9821       if (isOpenMPDistributeDirective(DKind))
9822         return NestedDir;
9823       if (DKind == OMPD_teams) {
9824         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9825             /*IgnoreCaptured=*/true);
9826         if (!Body)
9827           return nullptr;
9828         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9829         if (const auto *NND =
9830                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9831           DKind = NND->getDirectiveKind();
9832           if (isOpenMPDistributeDirective(DKind))
9833             return NND;
9834         }
9835       }
9836       return nullptr;
9837     case OMPD_target_teams:
9838       if (isOpenMPDistributeDirective(DKind))
9839         return NestedDir;
9840       return nullptr;
9841     case OMPD_target_parallel:
9842     case OMPD_target_simd:
9843     case OMPD_target_parallel_for:
9844     case OMPD_target_parallel_for_simd:
9845       return nullptr;
9846     case OMPD_target_teams_distribute:
9847     case OMPD_target_teams_distribute_simd:
9848     case OMPD_target_teams_distribute_parallel_for:
9849     case OMPD_target_teams_distribute_parallel_for_simd:
9850     case OMPD_parallel:
9851     case OMPD_for:
9852     case OMPD_parallel_for:
9853     case OMPD_parallel_master:
9854     case OMPD_parallel_sections:
9855     case OMPD_for_simd:
9856     case OMPD_parallel_for_simd:
9857     case OMPD_cancel:
9858     case OMPD_cancellation_point:
9859     case OMPD_ordered:
9860     case OMPD_threadprivate:
9861     case OMPD_allocate:
9862     case OMPD_task:
9863     case OMPD_simd:
9864     case OMPD_tile:
9865     case OMPD_unroll:
9866     case OMPD_sections:
9867     case OMPD_section:
9868     case OMPD_single:
9869     case OMPD_master:
9870     case OMPD_critical:
9871     case OMPD_taskyield:
9872     case OMPD_barrier:
9873     case OMPD_taskwait:
9874     case OMPD_taskgroup:
9875     case OMPD_atomic:
9876     case OMPD_flush:
9877     case OMPD_depobj:
9878     case OMPD_scan:
9879     case OMPD_teams:
9880     case OMPD_target_data:
9881     case OMPD_target_exit_data:
9882     case OMPD_target_enter_data:
9883     case OMPD_distribute:
9884     case OMPD_distribute_simd:
9885     case OMPD_distribute_parallel_for:
9886     case OMPD_distribute_parallel_for_simd:
9887     case OMPD_teams_distribute:
9888     case OMPD_teams_distribute_simd:
9889     case OMPD_teams_distribute_parallel_for:
9890     case OMPD_teams_distribute_parallel_for_simd:
9891     case OMPD_target_update:
9892     case OMPD_declare_simd:
9893     case OMPD_declare_variant:
9894     case OMPD_begin_declare_variant:
9895     case OMPD_end_declare_variant:
9896     case OMPD_declare_target:
9897     case OMPD_end_declare_target:
9898     case OMPD_declare_reduction:
9899     case OMPD_declare_mapper:
9900     case OMPD_taskloop:
9901     case OMPD_taskloop_simd:
9902     case OMPD_master_taskloop:
9903     case OMPD_master_taskloop_simd:
9904     case OMPD_parallel_master_taskloop:
9905     case OMPD_parallel_master_taskloop_simd:
9906     case OMPD_requires:
9907     case OMPD_metadirective:
9908     case OMPD_unknown:
9909     default:
9910       llvm_unreachable("Unexpected directive.");
9911     }
9912   }
9913 
9914   return nullptr;
9915 }
9916 
9917 /// Emit the user-defined mapper function. The code generation follows the
9918 /// pattern in the example below.
9919 /// \code
9920 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9921 ///                                           void *base, void *begin,
9922 ///                                           int64_t size, int64_t type,
9923 ///                                           void *name = nullptr) {
9924 ///   // Allocate space for an array section first or add a base/begin for
9925 ///   // pointer dereference.
9926 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9927 ///       !maptype.IsDelete)
9928 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9929 ///                                 size*sizeof(Ty), clearToFromMember(type));
9930 ///   // Map members.
9931 ///   for (unsigned i = 0; i < size; i++) {
9932 ///     // For each component specified by this mapper:
9933 ///     for (auto c : begin[i]->all_components) {
9934 ///       if (c.hasMapper())
9935 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9936 ///                       c.arg_type, c.arg_name);
9937 ///       else
9938 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9939 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9940 ///                                     c.arg_name);
9941 ///     }
9942 ///   }
9943 ///   // Delete the array section.
9944 ///   if (size > 1 && maptype.IsDelete)
9945 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9946 ///                                 size*sizeof(Ty), clearToFromMember(type));
9947 /// }
9948 /// \endcode
9949 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9950                                             CodeGenFunction *CGF) {
9951   if (UDMMap.count(D) > 0)
9952     return;
9953   ASTContext &C = CGM.getContext();
9954   QualType Ty = D->getType();
9955   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9956   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9957   auto *MapperVarDecl =
9958       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9959   SourceLocation Loc = D->getLocation();
9960   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9961 
9962   // Prepare mapper function arguments and attributes.
9963   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9964                               C.VoidPtrTy, ImplicitParamDecl::Other);
9965   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9966                             ImplicitParamDecl::Other);
9967   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9968                              C.VoidPtrTy, ImplicitParamDecl::Other);
9969   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9970                             ImplicitParamDecl::Other);
9971   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9972                             ImplicitParamDecl::Other);
9973   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9974                             ImplicitParamDecl::Other);
9975   FunctionArgList Args;
9976   Args.push_back(&HandleArg);
9977   Args.push_back(&BaseArg);
9978   Args.push_back(&BeginArg);
9979   Args.push_back(&SizeArg);
9980   Args.push_back(&TypeArg);
9981   Args.push_back(&NameArg);
9982   const CGFunctionInfo &FnInfo =
9983       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9984   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9985   SmallString<64> TyStr;
9986   llvm::raw_svector_ostream Out(TyStr);
9987   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9988   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9989   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9990                                     Name, &CGM.getModule());
9991   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9992   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9993   // Start the mapper function code generation.
9994   CodeGenFunction MapperCGF(CGM);
9995   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9996   // Compute the starting and end addresses of array elements.
9997   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9998       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9999       C.getPointerType(Int64Ty), Loc);
10000   // Prepare common arguments for array initiation and deletion.
10001   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10002       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10003       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10004   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10005       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10006       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10007   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10008       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10009       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10010   // Convert the size in bytes into the number of array elements.
10011   Size = MapperCGF.Builder.CreateExactUDiv(
10012       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10013   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10014       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10015   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10016       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10017   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10018       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10019       C.getPointerType(Int64Ty), Loc);
10020   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10021       MapperCGF.GetAddrOfLocalVar(&NameArg),
10022       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10023 
10024   // Emit array initiation if this is an array section and \p MapType indicates
10025   // that memory allocation is required.
10026   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10027   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10028                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10029 
10030   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10031 
10032   // Emit the loop header block.
10033   MapperCGF.EmitBlock(HeadBB);
10034   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10035   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10036   // Evaluate whether the initial condition is satisfied.
10037   llvm::Value *IsEmpty =
10038       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10039   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10040   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10041 
10042   // Emit the loop body block.
10043   MapperCGF.EmitBlock(BodyBB);
10044   llvm::BasicBlock *LastBB = BodyBB;
10045   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10046       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10047   PtrPHI->addIncoming(PtrBegin, EntryBB);
10048   Address PtrCurrent =
10049       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10050                           .getAlignment()
10051                           .alignmentOfArrayElement(ElementSize));
10052   // Privatize the declared variable of mapper to be the current array element.
10053   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10054   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10055   (void)Scope.Privatize();
10056 
10057   // Get map clause information. Fill up the arrays with all mapped variables.
10058   MappableExprsHandler::MapCombinedInfoTy Info;
10059   MappableExprsHandler MEHandler(*D, MapperCGF);
10060   MEHandler.generateAllInfoForMapper(Info);
10061 
10062   // Call the runtime API __tgt_mapper_num_components to get the number of
10063   // pre-existing components.
10064   llvm::Value *OffloadingArgs[] = {Handle};
10065   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10066       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10067                                             OMPRTL___tgt_mapper_num_components),
10068       OffloadingArgs);
10069   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10070       PreviousSize,
10071       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10072 
10073   // Fill up the runtime mapper handle for all components.
10074   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10075     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10076         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10077     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10078         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10079     llvm::Value *CurSizeArg = Info.Sizes[I];
10080     llvm::Value *CurNameArg =
10081         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10082             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10083             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10084 
10085     // Extract the MEMBER_OF field from the map type.
10086     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10087     llvm::Value *MemberMapType =
10088         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10089 
10090     // Combine the map type inherited from user-defined mapper with that
10091     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10092     // bits of the \a MapType, which is the input argument of the mapper
10093     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10094     // bits of MemberMapType.
10095     // [OpenMP 5.0], 1.2.6. map-type decay.
10096     //        | alloc |  to   | from  | tofrom | release | delete
10097     // ----------------------------------------------------------
10098     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10099     // to     | alloc |  to   | alloc |   to   | release | delete
10100     // from   | alloc | alloc | from  |  from  | release | delete
10101     // tofrom | alloc |  to   | from  | tofrom | release | delete
10102     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10103         MapType,
10104         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10105                                    MappableExprsHandler::OMP_MAP_FROM));
10106     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10107     llvm::BasicBlock *AllocElseBB =
10108         MapperCGF.createBasicBlock("omp.type.alloc.else");
10109     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10110     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10111     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10112     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10113     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10114     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10115     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10116     MapperCGF.EmitBlock(AllocBB);
10117     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10118         MemberMapType,
10119         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10120                                      MappableExprsHandler::OMP_MAP_FROM)));
10121     MapperCGF.Builder.CreateBr(EndBB);
10122     MapperCGF.EmitBlock(AllocElseBB);
10123     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10124         LeftToFrom,
10125         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10126     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10127     // In case of to, clear OMP_MAP_FROM.
10128     MapperCGF.EmitBlock(ToBB);
10129     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10130         MemberMapType,
10131         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10132     MapperCGF.Builder.CreateBr(EndBB);
10133     MapperCGF.EmitBlock(ToElseBB);
10134     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10135         LeftToFrom,
10136         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10137     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10138     // In case of from, clear OMP_MAP_TO.
10139     MapperCGF.EmitBlock(FromBB);
10140     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10141         MemberMapType,
10142         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10143     // In case of tofrom, do nothing.
10144     MapperCGF.EmitBlock(EndBB);
10145     LastBB = EndBB;
10146     llvm::PHINode *CurMapType =
10147         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10148     CurMapType->addIncoming(AllocMapType, AllocBB);
10149     CurMapType->addIncoming(ToMapType, ToBB);
10150     CurMapType->addIncoming(FromMapType, FromBB);
10151     CurMapType->addIncoming(MemberMapType, ToElseBB);
10152 
10153     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10154                                      CurSizeArg, CurMapType, CurNameArg};
10155     if (Info.Mappers[I]) {
10156       // Call the corresponding mapper function.
10157       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10158           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10159       assert(MapperFunc && "Expect a valid mapper function is available.");
10160       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10161     } else {
10162       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10163       // data structure.
10164       MapperCGF.EmitRuntimeCall(
10165           OMPBuilder.getOrCreateRuntimeFunction(
10166               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10167           OffloadingArgs);
10168     }
10169   }
10170 
10171   // Update the pointer to point to the next element that needs to be mapped,
10172   // and check whether we have mapped all elements.
10173   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10174   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10175       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10176   PtrPHI->addIncoming(PtrNext, LastBB);
10177   llvm::Value *IsDone =
10178       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10179   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10180   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10181 
10182   MapperCGF.EmitBlock(ExitBB);
10183   // Emit array deletion if this is an array section and \p MapType indicates
10184   // that deletion is required.
10185   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10186                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10187 
10188   // Emit the function exit block.
10189   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10190   MapperCGF.FinishFunction();
10191   UDMMap.try_emplace(D, Fn);
10192   if (CGF) {
10193     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10194     Decls.second.push_back(D);
10195   }
10196 }
10197 
10198 /// Emit the array initialization or deletion portion for user-defined mapper
10199 /// code generation. First, it evaluates whether an array section is mapped and
10200 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10201 /// true, and \a MapType indicates to not delete this array, array
10202 /// initialization code is generated. If \a IsInit is false, and \a MapType
10203 /// indicates to not this array, array deletion code is generated.
10204 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10205     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10206     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10207     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10208     bool IsInit) {
10209   StringRef Prefix = IsInit ? ".init" : ".del";
10210 
10211   // Evaluate if this is an array section.
10212   llvm::BasicBlock *BodyBB =
10213       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10214   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10215       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10216   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10217       MapType,
10218       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10219   llvm::Value *DeleteCond;
10220   llvm::Value *Cond;
10221   if (IsInit) {
10222     // base != begin?
10223     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10224         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10225     // IsPtrAndObj?
10226     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10227         MapType,
10228         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10229     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10230     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10231     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10232     DeleteCond = MapperCGF.Builder.CreateIsNull(
10233         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10234   } else {
10235     Cond = IsArray;
10236     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10237         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10238   }
10239   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10240   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10241 
10242   MapperCGF.EmitBlock(BodyBB);
10243   // Get the array size by multiplying element size and element number (i.e., \p
10244   // Size).
10245   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10246       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10247   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10248   // memory allocation/deletion purpose only.
10249   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10250       MapType,
10251       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10252                                    MappableExprsHandler::OMP_MAP_FROM)));
10253   MapTypeArg = MapperCGF.Builder.CreateOr(
10254       MapTypeArg,
10255       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10256 
10257   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10258   // data structure.
10259   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10260                                    ArraySize, MapTypeArg, MapName};
10261   MapperCGF.EmitRuntimeCall(
10262       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10263                                             OMPRTL___tgt_push_mapper_component),
10264       OffloadingArgs);
10265 }
10266 
10267 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10268     const OMPDeclareMapperDecl *D) {
10269   auto I = UDMMap.find(D);
10270   if (I != UDMMap.end())
10271     return I->second;
10272   emitUserDefinedMapper(D);
10273   return UDMMap.lookup(D);
10274 }
10275 
10276 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10277     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10278     llvm::Value *DeviceID,
10279     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10280                                      const OMPLoopDirective &D)>
10281         SizeEmitter) {
10282   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10283   const OMPExecutableDirective *TD = &D;
10284   // Get nested teams distribute kind directive, if any.
10285   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10286     TD = getNestedDistributeDirective(CGM.getContext(), D);
10287   if (!TD)
10288     return;
10289   const auto *LD = cast<OMPLoopDirective>(TD);
10290   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10291                                                          PrePostActionTy &) {
10292     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10293       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10294       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10295       CGF.EmitRuntimeCall(
10296           OMPBuilder.getOrCreateRuntimeFunction(
10297               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10298           Args);
10299     }
10300   };
10301   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10302 }
10303 
10304 void CGOpenMPRuntime::emitTargetCall(
10305     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10306     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10307     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10308     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10309                                      const OMPLoopDirective &D)>
10310         SizeEmitter) {
10311   if (!CGF.HaveInsertPoint())
10312     return;
10313 
10314   assert(OutlinedFn && "Invalid outlined function!");
10315 
10316   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10317                                  D.hasClausesOfKind<OMPNowaitClause>();
10318   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10319   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10320   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10321                                             PrePostActionTy &) {
10322     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10323   };
10324   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10325 
10326   CodeGenFunction::OMPTargetDataInfo InputInfo;
10327   llvm::Value *MapTypesArray = nullptr;
10328   llvm::Value *MapNamesArray = nullptr;
10329   // Fill up the pointer arrays and transfer execution to the device.
10330   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10331                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10332                     &CapturedVars,
10333                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10334     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10335       // Reverse offloading is not supported, so just execute on the host.
10336       if (RequiresOuterTask) {
10337         CapturedVars.clear();
10338         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10339       }
10340       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10341       return;
10342     }
10343 
10344     // On top of the arrays that were filled up, the target offloading call
10345     // takes as arguments the device id as well as the host pointer. The host
10346     // pointer is used by the runtime library to identify the current target
10347     // region, so it only has to be unique and not necessarily point to
10348     // anything. It could be the pointer to the outlined function that
10349     // implements the target region, but we aren't using that so that the
10350     // compiler doesn't need to keep that, and could therefore inline the host
10351     // function if proven worthwhile during optimization.
10352 
10353     // From this point on, we need to have an ID of the target region defined.
10354     assert(OutlinedFnID && "Invalid outlined function ID!");
10355 
10356     // Emit device ID if any.
10357     llvm::Value *DeviceID;
10358     if (Device.getPointer()) {
10359       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10360               Device.getInt() == OMPC_DEVICE_device_num) &&
10361              "Expected device_num modifier.");
10362       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10363       DeviceID =
10364           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10365     } else {
10366       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10367     }
10368 
10369     // Emit the number of elements in the offloading arrays.
10370     llvm::Value *PointerNum =
10371         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10372 
10373     // Return value of the runtime offloading call.
10374     llvm::Value *Return;
10375 
10376     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10377     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10378 
10379     // Source location for the ident struct
10380     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10381 
10382     // Emit tripcount for the target loop-based directive.
10383     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10384 
10385     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10386     // The target region is an outlined function launched by the runtime
10387     // via calls __tgt_target() or __tgt_target_teams().
10388     //
10389     // __tgt_target() launches a target region with one team and one thread,
10390     // executing a serial region.  This master thread may in turn launch
10391     // more threads within its team upon encountering a parallel region,
10392     // however, no additional teams can be launched on the device.
10393     //
10394     // __tgt_target_teams() launches a target region with one or more teams,
10395     // each with one or more threads.  This call is required for target
10396     // constructs such as:
10397     //  'target teams'
10398     //  'target' / 'teams'
10399     //  'target teams distribute parallel for'
10400     //  'target parallel'
10401     // and so on.
10402     //
10403     // Note that on the host and CPU targets, the runtime implementation of
10404     // these calls simply call the outlined function without forking threads.
10405     // The outlined functions themselves have runtime calls to
10406     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10407     // the compiler in emitTeamsCall() and emitParallelCall().
10408     //
10409     // In contrast, on the NVPTX target, the implementation of
10410     // __tgt_target_teams() launches a GPU kernel with the requested number
10411     // of teams and threads so no additional calls to the runtime are required.
10412     if (NumTeams) {
10413       // If we have NumTeams defined this means that we have an enclosed teams
10414       // region. Therefore we also expect to have NumThreads defined. These two
10415       // values should be defined in the presence of a teams directive,
10416       // regardless of having any clauses associated. If the user is using teams
10417       // but no clauses, these two values will be the default that should be
10418       // passed to the runtime library - a 32-bit integer with the value zero.
10419       assert(NumThreads && "Thread limit expression should be available along "
10420                            "with number of teams.");
10421       SmallVector<llvm::Value *> OffloadingArgs = {
10422           RTLoc,
10423           DeviceID,
10424           OutlinedFnID,
10425           PointerNum,
10426           InputInfo.BasePointersArray.getPointer(),
10427           InputInfo.PointersArray.getPointer(),
10428           InputInfo.SizesArray.getPointer(),
10429           MapTypesArray,
10430           MapNamesArray,
10431           InputInfo.MappersArray.getPointer(),
10432           NumTeams,
10433           NumThreads};
10434       if (HasNowait) {
10435         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10436         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10437         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10438         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10439         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10440         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10441       }
10442       Return = CGF.EmitRuntimeCall(
10443           OMPBuilder.getOrCreateRuntimeFunction(
10444               CGM.getModule(), HasNowait
10445                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10446                                    : OMPRTL___tgt_target_teams_mapper),
10447           OffloadingArgs);
10448     } else {
10449       SmallVector<llvm::Value *> OffloadingArgs = {
10450           RTLoc,
10451           DeviceID,
10452           OutlinedFnID,
10453           PointerNum,
10454           InputInfo.BasePointersArray.getPointer(),
10455           InputInfo.PointersArray.getPointer(),
10456           InputInfo.SizesArray.getPointer(),
10457           MapTypesArray,
10458           MapNamesArray,
10459           InputInfo.MappersArray.getPointer()};
10460       if (HasNowait) {
10461         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10462         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10463         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10464         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10465         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10466         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10467       }
10468       Return = CGF.EmitRuntimeCall(
10469           OMPBuilder.getOrCreateRuntimeFunction(
10470               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10471                                          : OMPRTL___tgt_target_mapper),
10472           OffloadingArgs);
10473     }
10474 
10475     // Check the error code and execute the host version if required.
10476     llvm::BasicBlock *OffloadFailedBlock =
10477         CGF.createBasicBlock("omp_offload.failed");
10478     llvm::BasicBlock *OffloadContBlock =
10479         CGF.createBasicBlock("omp_offload.cont");
10480     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10481     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10482 
10483     CGF.EmitBlock(OffloadFailedBlock);
10484     if (RequiresOuterTask) {
10485       CapturedVars.clear();
10486       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10487     }
10488     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10489     CGF.EmitBranch(OffloadContBlock);
10490 
10491     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10492   };
10493 
10494   // Notify that the host version must be executed.
10495   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10496                     RequiresOuterTask](CodeGenFunction &CGF,
10497                                        PrePostActionTy &) {
10498     if (RequiresOuterTask) {
10499       CapturedVars.clear();
10500       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10501     }
10502     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10503   };
10504 
10505   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10506                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10507                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10508     // Fill up the arrays with all the captured variables.
10509     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10510 
10511     // Get mappable expression information.
10512     MappableExprsHandler MEHandler(D, CGF);
10513     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10514     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10515 
10516     auto RI = CS.getCapturedRecordDecl()->field_begin();
10517     auto *CV = CapturedVars.begin();
10518     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10519                                               CE = CS.capture_end();
10520          CI != CE; ++CI, ++RI, ++CV) {
10521       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10522       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10523 
10524       // VLA sizes are passed to the outlined region by copy and do not have map
10525       // information associated.
10526       if (CI->capturesVariableArrayType()) {
10527         CurInfo.Exprs.push_back(nullptr);
10528         CurInfo.BasePointers.push_back(*CV);
10529         CurInfo.Pointers.push_back(*CV);
10530         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10531             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10532         // Copy to the device as an argument. No need to retrieve it.
10533         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10534                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10535                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10536         CurInfo.Mappers.push_back(nullptr);
10537       } else {
10538         // If we have any information in the map clause, we use it, otherwise we
10539         // just do a default mapping.
10540         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10541         if (!CI->capturesThis())
10542           MappedVarSet.insert(CI->getCapturedVar());
10543         else
10544           MappedVarSet.insert(nullptr);
10545         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10546           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10547         // Generate correct mapping for variables captured by reference in
10548         // lambdas.
10549         if (CI->capturesVariable())
10550           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10551                                                   CurInfo, LambdaPointers);
10552       }
10553       // We expect to have at least an element of information for this capture.
10554       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10555              "Non-existing map pointer for capture!");
10556       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10557              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10558              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10559              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10560              "Inconsistent map information sizes!");
10561 
10562       // If there is an entry in PartialStruct it means we have a struct with
10563       // individual members mapped. Emit an extra combined entry.
10564       if (PartialStruct.Base.isValid()) {
10565         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10566         MEHandler.emitCombinedEntry(
10567             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10568             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10569       }
10570 
10571       // We need to append the results of this capture to what we already have.
10572       CombinedInfo.append(CurInfo);
10573     }
10574     // Adjust MEMBER_OF flags for the lambdas captures.
10575     MEHandler.adjustMemberOfForLambdaCaptures(
10576         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10577         CombinedInfo.Types);
10578     // Map any list items in a map clause that were not captures because they
10579     // weren't referenced within the construct.
10580     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10581 
10582     TargetDataInfo Info;
10583     // Fill up the arrays and create the arguments.
10584     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10585     emitOffloadingArraysArgument(
10586         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10587         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10588         {/*ForEndTask=*/false});
10589 
10590     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10591     InputInfo.BasePointersArray =
10592         Address(Info.BasePointersArray, CGM.getPointerAlign());
10593     InputInfo.PointersArray =
10594         Address(Info.PointersArray, CGM.getPointerAlign());
10595     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10596     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10597     MapTypesArray = Info.MapTypesArray;
10598     MapNamesArray = Info.MapNamesArray;
10599     if (RequiresOuterTask)
10600       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10601     else
10602       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10603   };
10604 
10605   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10606                              CodeGenFunction &CGF, PrePostActionTy &) {
10607     if (RequiresOuterTask) {
10608       CodeGenFunction::OMPTargetDataInfo InputInfo;
10609       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10610     } else {
10611       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10612     }
10613   };
10614 
10615   // If we have a target function ID it means that we need to support
10616   // offloading, otherwise, just execute on the host. We need to execute on host
10617   // regardless of the conditional in the if clause if, e.g., the user do not
10618   // specify target triples.
10619   if (OutlinedFnID) {
10620     if (IfCond) {
10621       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10622     } else {
10623       RegionCodeGenTy ThenRCG(TargetThenGen);
10624       ThenRCG(CGF);
10625     }
10626   } else {
10627     RegionCodeGenTy ElseRCG(TargetElseGen);
10628     ElseRCG(CGF);
10629   }
10630 }
10631 
10632 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10633                                                     StringRef ParentName) {
10634   if (!S)
10635     return;
10636 
10637   // Codegen OMP target directives that offload compute to the device.
10638   bool RequiresDeviceCodegen =
10639       isa<OMPExecutableDirective>(S) &&
10640       isOpenMPTargetExecutionDirective(
10641           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10642 
10643   if (RequiresDeviceCodegen) {
10644     const auto &E = *cast<OMPExecutableDirective>(S);
10645     unsigned DeviceID;
10646     unsigned FileID;
10647     unsigned Line;
10648     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10649                              FileID, Line);
10650 
10651     // Is this a target region that should not be emitted as an entry point? If
10652     // so just signal we are done with this target region.
10653     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10654                                                             ParentName, Line))
10655       return;
10656 
10657     switch (E.getDirectiveKind()) {
10658     case OMPD_target:
10659       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10660                                                    cast<OMPTargetDirective>(E));
10661       break;
10662     case OMPD_target_parallel:
10663       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10664           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10665       break;
10666     case OMPD_target_teams:
10667       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10668           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10669       break;
10670     case OMPD_target_teams_distribute:
10671       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10672           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10673       break;
10674     case OMPD_target_teams_distribute_simd:
10675       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10676           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10677       break;
10678     case OMPD_target_parallel_for:
10679       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10680           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10681       break;
10682     case OMPD_target_parallel_for_simd:
10683       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10684           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10685       break;
10686     case OMPD_target_simd:
10687       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10688           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10689       break;
10690     case OMPD_target_teams_distribute_parallel_for:
10691       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10692           CGM, ParentName,
10693           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10694       break;
10695     case OMPD_target_teams_distribute_parallel_for_simd:
10696       CodeGenFunction::
10697           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10698               CGM, ParentName,
10699               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10700       break;
10701     case OMPD_parallel:
10702     case OMPD_for:
10703     case OMPD_parallel_for:
10704     case OMPD_parallel_master:
10705     case OMPD_parallel_sections:
10706     case OMPD_for_simd:
10707     case OMPD_parallel_for_simd:
10708     case OMPD_cancel:
10709     case OMPD_cancellation_point:
10710     case OMPD_ordered:
10711     case OMPD_threadprivate:
10712     case OMPD_allocate:
10713     case OMPD_task:
10714     case OMPD_simd:
10715     case OMPD_tile:
10716     case OMPD_unroll:
10717     case OMPD_sections:
10718     case OMPD_section:
10719     case OMPD_single:
10720     case OMPD_master:
10721     case OMPD_critical:
10722     case OMPD_taskyield:
10723     case OMPD_barrier:
10724     case OMPD_taskwait:
10725     case OMPD_taskgroup:
10726     case OMPD_atomic:
10727     case OMPD_flush:
10728     case OMPD_depobj:
10729     case OMPD_scan:
10730     case OMPD_teams:
10731     case OMPD_target_data:
10732     case OMPD_target_exit_data:
10733     case OMPD_target_enter_data:
10734     case OMPD_distribute:
10735     case OMPD_distribute_simd:
10736     case OMPD_distribute_parallel_for:
10737     case OMPD_distribute_parallel_for_simd:
10738     case OMPD_teams_distribute:
10739     case OMPD_teams_distribute_simd:
10740     case OMPD_teams_distribute_parallel_for:
10741     case OMPD_teams_distribute_parallel_for_simd:
10742     case OMPD_target_update:
10743     case OMPD_declare_simd:
10744     case OMPD_declare_variant:
10745     case OMPD_begin_declare_variant:
10746     case OMPD_end_declare_variant:
10747     case OMPD_declare_target:
10748     case OMPD_end_declare_target:
10749     case OMPD_declare_reduction:
10750     case OMPD_declare_mapper:
10751     case OMPD_taskloop:
10752     case OMPD_taskloop_simd:
10753     case OMPD_master_taskloop:
10754     case OMPD_master_taskloop_simd:
10755     case OMPD_parallel_master_taskloop:
10756     case OMPD_parallel_master_taskloop_simd:
10757     case OMPD_requires:
10758     case OMPD_metadirective:
10759     case OMPD_unknown:
10760     default:
10761       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10762     }
10763     return;
10764   }
10765 
10766   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10767     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10768       return;
10769 
10770     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10771     return;
10772   }
10773 
10774   // If this is a lambda function, look into its body.
10775   if (const auto *L = dyn_cast<LambdaExpr>(S))
10776     S = L->getBody();
10777 
10778   // Keep looking for target regions recursively.
10779   for (const Stmt *II : S->children())
10780     scanForTargetRegionsFunctions(II, ParentName);
10781 }
10782 
10783 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10784   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10785       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10786   if (!DevTy)
10787     return false;
10788   // Do not emit device_type(nohost) functions for the host.
10789   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10790     return true;
10791   // Do not emit device_type(host) functions for the device.
10792   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10793     return true;
10794   return false;
10795 }
10796 
10797 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10798   // If emitting code for the host, we do not process FD here. Instead we do
10799   // the normal code generation.
10800   if (!CGM.getLangOpts().OpenMPIsDevice) {
10801     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10802       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10803                                   CGM.getLangOpts().OpenMPIsDevice))
10804         return true;
10805     return false;
10806   }
10807 
10808   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10809   // Try to detect target regions in the function.
10810   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10811     StringRef Name = CGM.getMangledName(GD);
10812     scanForTargetRegionsFunctions(FD->getBody(), Name);
10813     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10814                                 CGM.getLangOpts().OpenMPIsDevice))
10815       return true;
10816   }
10817 
10818   // Do not to emit function if it is not marked as declare target.
10819   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10820          AlreadyEmittedTargetDecls.count(VD) == 0;
10821 }
10822 
10823 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10824   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10825                               CGM.getLangOpts().OpenMPIsDevice))
10826     return true;
10827 
10828   if (!CGM.getLangOpts().OpenMPIsDevice)
10829     return false;
10830 
10831   // Check if there are Ctors/Dtors in this declaration and look for target
10832   // regions in it. We use the complete variant to produce the kernel name
10833   // mangling.
10834   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10835   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10836     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10837       StringRef ParentName =
10838           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10839       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10840     }
10841     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10842       StringRef ParentName =
10843           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10844       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10845     }
10846   }
10847 
10848   // Do not to emit variable if it is not marked as declare target.
10849   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10850       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10851           cast<VarDecl>(GD.getDecl()));
10852   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10853       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10854        HasRequiresUnifiedSharedMemory)) {
10855     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10856     return true;
10857   }
10858   return false;
10859 }
10860 
10861 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10862                                                    llvm::Constant *Addr) {
10863   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10864       !CGM.getLangOpts().OpenMPIsDevice)
10865     return;
10866 
10867   // If we have host/nohost variables, they do not need to be registered.
10868   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10869       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10870   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10871     return;
10872 
10873   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10874       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10875   if (!Res) {
10876     if (CGM.getLangOpts().OpenMPIsDevice) {
10877       // Register non-target variables being emitted in device code (debug info
10878       // may cause this).
10879       StringRef VarName = CGM.getMangledName(VD);
10880       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10881     }
10882     return;
10883   }
10884   // Register declare target variables.
10885   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10886   StringRef VarName;
10887   CharUnits VarSize;
10888   llvm::GlobalValue::LinkageTypes Linkage;
10889 
10890   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10891       !HasRequiresUnifiedSharedMemory) {
10892     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10893     VarName = CGM.getMangledName(VD);
10894     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10895       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10896       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10897     } else {
10898       VarSize = CharUnits::Zero();
10899     }
10900     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10901     // Temp solution to prevent optimizations of the internal variables.
10902     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10903       // Do not create a "ref-variable" if the original is not also available
10904       // on the host.
10905       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10906         return;
10907       std::string RefName = getName({VarName, "ref"});
10908       if (!CGM.GetGlobalValue(RefName)) {
10909         llvm::Constant *AddrRef =
10910             getOrCreateInternalVariable(Addr->getType(), RefName);
10911         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10912         GVAddrRef->setConstant(/*Val=*/true);
10913         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10914         GVAddrRef->setInitializer(Addr);
10915         CGM.addCompilerUsedGlobal(GVAddrRef);
10916       }
10917     }
10918   } else {
10919     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10920             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10921              HasRequiresUnifiedSharedMemory)) &&
10922            "Declare target attribute must link or to with unified memory.");
10923     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10924       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10925     else
10926       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10927 
10928     if (CGM.getLangOpts().OpenMPIsDevice) {
10929       VarName = Addr->getName();
10930       Addr = nullptr;
10931     } else {
10932       VarName = getAddrOfDeclareTargetVar(VD).getName();
10933       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10934     }
10935     VarSize = CGM.getPointerSize();
10936     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10937   }
10938 
10939   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10940       VarName, Addr, VarSize, Flags, Linkage);
10941 }
10942 
10943 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10944   if (isa<FunctionDecl>(GD.getDecl()) ||
10945       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10946     return emitTargetFunctions(GD);
10947 
10948   return emitTargetGlobalVariable(GD);
10949 }
10950 
10951 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10952   for (const VarDecl *VD : DeferredGlobalVariables) {
10953     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10954         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10955     if (!Res)
10956       continue;
10957     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10958         !HasRequiresUnifiedSharedMemory) {
10959       CGM.EmitGlobal(VD);
10960     } else {
10961       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10962               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10963                HasRequiresUnifiedSharedMemory)) &&
10964              "Expected link clause or to clause with unified memory.");
10965       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10966     }
10967   }
10968 }
10969 
10970 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10971     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10972   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10973          " Expected target-based directive.");
10974 }
10975 
10976 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10977   for (const OMPClause *Clause : D->clauselists()) {
10978     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10979       HasRequiresUnifiedSharedMemory = true;
10980     } else if (const auto *AC =
10981                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10982       switch (AC->getAtomicDefaultMemOrderKind()) {
10983       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10984         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10985         break;
10986       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10987         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10988         break;
10989       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10990         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10991         break;
10992       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10993         break;
10994       }
10995     }
10996   }
10997 }
10998 
10999 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11000   return RequiresAtomicOrdering;
11001 }
11002 
11003 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11004                                                        LangAS &AS) {
11005   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11006     return false;
11007   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11008   switch(A->getAllocatorType()) {
11009   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11010   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11011   // Not supported, fallback to the default mem space.
11012   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11013   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11014   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11015   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11016   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11017   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11018   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11019     AS = LangAS::Default;
11020     return true;
11021   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11022     llvm_unreachable("Expected predefined allocator for the variables with the "
11023                      "static storage.");
11024   }
11025   return false;
11026 }
11027 
11028 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11029   return HasRequiresUnifiedSharedMemory;
11030 }
11031 
11032 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11033     CodeGenModule &CGM)
11034     : CGM(CGM) {
11035   if (CGM.getLangOpts().OpenMPIsDevice) {
11036     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11037     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11038   }
11039 }
11040 
11041 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11042   if (CGM.getLangOpts().OpenMPIsDevice)
11043     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11044 }
11045 
11046 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11047   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11048     return true;
11049 
11050   const auto *D = cast<FunctionDecl>(GD.getDecl());
11051   // Do not to emit function if it is marked as declare target as it was already
11052   // emitted.
11053   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11054     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11055       if (auto *F = dyn_cast_or_null<llvm::Function>(
11056               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11057         return !F->isDeclaration();
11058       return false;
11059     }
11060     return true;
11061   }
11062 
11063   return !AlreadyEmittedTargetDecls.insert(D).second;
11064 }
11065 
11066 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11067   // If we don't have entries or if we are emitting code for the device, we
11068   // don't need to do anything.
11069   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11070       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11071       (OffloadEntriesInfoManager.empty() &&
11072        !HasEmittedDeclareTargetRegion &&
11073        !HasEmittedTargetRegion))
11074     return nullptr;
11075 
11076   // Create and register the function that handles the requires directives.
11077   ASTContext &C = CGM.getContext();
11078 
11079   llvm::Function *RequiresRegFn;
11080   {
11081     CodeGenFunction CGF(CGM);
11082     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11083     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11084     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11085     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11086     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11087     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11088     // TODO: check for other requires clauses.
11089     // The requires directive takes effect only when a target region is
11090     // present in the compilation unit. Otherwise it is ignored and not
11091     // passed to the runtime. This avoids the runtime from throwing an error
11092     // for mismatching requires clauses across compilation units that don't
11093     // contain at least 1 target region.
11094     assert((HasEmittedTargetRegion ||
11095             HasEmittedDeclareTargetRegion ||
11096             !OffloadEntriesInfoManager.empty()) &&
11097            "Target or declare target region expected.");
11098     if (HasRequiresUnifiedSharedMemory)
11099       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11100     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11101                             CGM.getModule(), OMPRTL___tgt_register_requires),
11102                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11103     CGF.FinishFunction();
11104   }
11105   return RequiresRegFn;
11106 }
11107 
11108 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11109                                     const OMPExecutableDirective &D,
11110                                     SourceLocation Loc,
11111                                     llvm::Function *OutlinedFn,
11112                                     ArrayRef<llvm::Value *> CapturedVars) {
11113   if (!CGF.HaveInsertPoint())
11114     return;
11115 
11116   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11117   CodeGenFunction::RunCleanupsScope Scope(CGF);
11118 
11119   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11120   llvm::Value *Args[] = {
11121       RTLoc,
11122       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11123       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11124   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11125   RealArgs.append(std::begin(Args), std::end(Args));
11126   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11127 
11128   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11129       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11130   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11131 }
11132 
11133 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11134                                          const Expr *NumTeams,
11135                                          const Expr *ThreadLimit,
11136                                          SourceLocation Loc) {
11137   if (!CGF.HaveInsertPoint())
11138     return;
11139 
11140   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11141 
11142   llvm::Value *NumTeamsVal =
11143       NumTeams
11144           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11145                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11146           : CGF.Builder.getInt32(0);
11147 
11148   llvm::Value *ThreadLimitVal =
11149       ThreadLimit
11150           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11151                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11152           : CGF.Builder.getInt32(0);
11153 
11154   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11155   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11156                                      ThreadLimitVal};
11157   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11158                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11159                       PushNumTeamsArgs);
11160 }
11161 
11162 void CGOpenMPRuntime::emitTargetDataCalls(
11163     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11164     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11165   if (!CGF.HaveInsertPoint())
11166     return;
11167 
11168   // Action used to replace the default codegen action and turn privatization
11169   // off.
11170   PrePostActionTy NoPrivAction;
11171 
11172   // Generate the code for the opening of the data environment. Capture all the
11173   // arguments of the runtime call by reference because they are used in the
11174   // closing of the region.
11175   auto &&BeginThenGen = [this, &D, Device, &Info,
11176                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11177     // Fill up the arrays with all the mapped variables.
11178     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11179 
11180     // Get map clause information.
11181     MappableExprsHandler MEHandler(D, CGF);
11182     MEHandler.generateAllInfo(CombinedInfo);
11183 
11184     // Fill up the arrays and create the arguments.
11185     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11186                          /*IsNonContiguous=*/true);
11187 
11188     llvm::Value *BasePointersArrayArg = nullptr;
11189     llvm::Value *PointersArrayArg = nullptr;
11190     llvm::Value *SizesArrayArg = nullptr;
11191     llvm::Value *MapTypesArrayArg = nullptr;
11192     llvm::Value *MapNamesArrayArg = nullptr;
11193     llvm::Value *MappersArrayArg = nullptr;
11194     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11195                                  SizesArrayArg, MapTypesArrayArg,
11196                                  MapNamesArrayArg, MappersArrayArg, Info);
11197 
11198     // Emit device ID if any.
11199     llvm::Value *DeviceID = nullptr;
11200     if (Device) {
11201       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11202                                            CGF.Int64Ty, /*isSigned=*/true);
11203     } else {
11204       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11205     }
11206 
11207     // Emit the number of elements in the offloading arrays.
11208     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11209     //
11210     // Source location for the ident struct
11211     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11212 
11213     llvm::Value *OffloadingArgs[] = {RTLoc,
11214                                      DeviceID,
11215                                      PointerNum,
11216                                      BasePointersArrayArg,
11217                                      PointersArrayArg,
11218                                      SizesArrayArg,
11219                                      MapTypesArrayArg,
11220                                      MapNamesArrayArg,
11221                                      MappersArrayArg};
11222     CGF.EmitRuntimeCall(
11223         OMPBuilder.getOrCreateRuntimeFunction(
11224             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11225         OffloadingArgs);
11226 
11227     // If device pointer privatization is required, emit the body of the region
11228     // here. It will have to be duplicated: with and without privatization.
11229     if (!Info.CaptureDeviceAddrMap.empty())
11230       CodeGen(CGF);
11231   };
11232 
11233   // Generate code for the closing of the data region.
11234   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11235                                                 PrePostActionTy &) {
11236     assert(Info.isValid() && "Invalid data environment closing arguments.");
11237 
11238     llvm::Value *BasePointersArrayArg = nullptr;
11239     llvm::Value *PointersArrayArg = nullptr;
11240     llvm::Value *SizesArrayArg = nullptr;
11241     llvm::Value *MapTypesArrayArg = nullptr;
11242     llvm::Value *MapNamesArrayArg = nullptr;
11243     llvm::Value *MappersArrayArg = nullptr;
11244     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11245                                  SizesArrayArg, MapTypesArrayArg,
11246                                  MapNamesArrayArg, MappersArrayArg, Info,
11247                                  {/*ForEndCall=*/true});
11248 
11249     // Emit device ID if any.
11250     llvm::Value *DeviceID = nullptr;
11251     if (Device) {
11252       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11253                                            CGF.Int64Ty, /*isSigned=*/true);
11254     } else {
11255       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11256     }
11257 
11258     // Emit the number of elements in the offloading arrays.
11259     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11260 
11261     // Source location for the ident struct
11262     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11263 
11264     llvm::Value *OffloadingArgs[] = {RTLoc,
11265                                      DeviceID,
11266                                      PointerNum,
11267                                      BasePointersArrayArg,
11268                                      PointersArrayArg,
11269                                      SizesArrayArg,
11270                                      MapTypesArrayArg,
11271                                      MapNamesArrayArg,
11272                                      MappersArrayArg};
11273     CGF.EmitRuntimeCall(
11274         OMPBuilder.getOrCreateRuntimeFunction(
11275             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11276         OffloadingArgs);
11277   };
11278 
11279   // If we need device pointer privatization, we need to emit the body of the
11280   // region with no privatization in the 'else' branch of the conditional.
11281   // Otherwise, we don't have to do anything.
11282   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11283                                                          PrePostActionTy &) {
11284     if (!Info.CaptureDeviceAddrMap.empty()) {
11285       CodeGen.setAction(NoPrivAction);
11286       CodeGen(CGF);
11287     }
11288   };
11289 
11290   // We don't have to do anything to close the region if the if clause evaluates
11291   // to false.
11292   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11293 
11294   if (IfCond) {
11295     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11296   } else {
11297     RegionCodeGenTy RCG(BeginThenGen);
11298     RCG(CGF);
11299   }
11300 
11301   // If we don't require privatization of device pointers, we emit the body in
11302   // between the runtime calls. This avoids duplicating the body code.
11303   if (Info.CaptureDeviceAddrMap.empty()) {
11304     CodeGen.setAction(NoPrivAction);
11305     CodeGen(CGF);
11306   }
11307 
11308   if (IfCond) {
11309     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11310   } else {
11311     RegionCodeGenTy RCG(EndThenGen);
11312     RCG(CGF);
11313   }
11314 }
11315 
11316 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11317     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11318     const Expr *Device) {
11319   if (!CGF.HaveInsertPoint())
11320     return;
11321 
11322   assert((isa<OMPTargetEnterDataDirective>(D) ||
11323           isa<OMPTargetExitDataDirective>(D) ||
11324           isa<OMPTargetUpdateDirective>(D)) &&
11325          "Expecting either target enter, exit data, or update directives.");
11326 
11327   CodeGenFunction::OMPTargetDataInfo InputInfo;
11328   llvm::Value *MapTypesArray = nullptr;
11329   llvm::Value *MapNamesArray = nullptr;
11330   // Generate the code for the opening of the data environment.
11331   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11332                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11333     // Emit device ID if any.
11334     llvm::Value *DeviceID = nullptr;
11335     if (Device) {
11336       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11337                                            CGF.Int64Ty, /*isSigned=*/true);
11338     } else {
11339       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11340     }
11341 
11342     // Emit the number of elements in the offloading arrays.
11343     llvm::Constant *PointerNum =
11344         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11345 
11346     // Source location for the ident struct
11347     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11348 
11349     llvm::Value *OffloadingArgs[] = {RTLoc,
11350                                      DeviceID,
11351                                      PointerNum,
11352                                      InputInfo.BasePointersArray.getPointer(),
11353                                      InputInfo.PointersArray.getPointer(),
11354                                      InputInfo.SizesArray.getPointer(),
11355                                      MapTypesArray,
11356                                      MapNamesArray,
11357                                      InputInfo.MappersArray.getPointer()};
11358 
11359     // Select the right runtime function call for each standalone
11360     // directive.
11361     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11362     RuntimeFunction RTLFn;
11363     switch (D.getDirectiveKind()) {
11364     case OMPD_target_enter_data:
11365       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11366                         : OMPRTL___tgt_target_data_begin_mapper;
11367       break;
11368     case OMPD_target_exit_data:
11369       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11370                         : OMPRTL___tgt_target_data_end_mapper;
11371       break;
11372     case OMPD_target_update:
11373       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11374                         : OMPRTL___tgt_target_data_update_mapper;
11375       break;
11376     case OMPD_parallel:
11377     case OMPD_for:
11378     case OMPD_parallel_for:
11379     case OMPD_parallel_master:
11380     case OMPD_parallel_sections:
11381     case OMPD_for_simd:
11382     case OMPD_parallel_for_simd:
11383     case OMPD_cancel:
11384     case OMPD_cancellation_point:
11385     case OMPD_ordered:
11386     case OMPD_threadprivate:
11387     case OMPD_allocate:
11388     case OMPD_task:
11389     case OMPD_simd:
11390     case OMPD_tile:
11391     case OMPD_unroll:
11392     case OMPD_sections:
11393     case OMPD_section:
11394     case OMPD_single:
11395     case OMPD_master:
11396     case OMPD_critical:
11397     case OMPD_taskyield:
11398     case OMPD_barrier:
11399     case OMPD_taskwait:
11400     case OMPD_taskgroup:
11401     case OMPD_atomic:
11402     case OMPD_flush:
11403     case OMPD_depobj:
11404     case OMPD_scan:
11405     case OMPD_teams:
11406     case OMPD_target_data:
11407     case OMPD_distribute:
11408     case OMPD_distribute_simd:
11409     case OMPD_distribute_parallel_for:
11410     case OMPD_distribute_parallel_for_simd:
11411     case OMPD_teams_distribute:
11412     case OMPD_teams_distribute_simd:
11413     case OMPD_teams_distribute_parallel_for:
11414     case OMPD_teams_distribute_parallel_for_simd:
11415     case OMPD_declare_simd:
11416     case OMPD_declare_variant:
11417     case OMPD_begin_declare_variant:
11418     case OMPD_end_declare_variant:
11419     case OMPD_declare_target:
11420     case OMPD_end_declare_target:
11421     case OMPD_declare_reduction:
11422     case OMPD_declare_mapper:
11423     case OMPD_taskloop:
11424     case OMPD_taskloop_simd:
11425     case OMPD_master_taskloop:
11426     case OMPD_master_taskloop_simd:
11427     case OMPD_parallel_master_taskloop:
11428     case OMPD_parallel_master_taskloop_simd:
11429     case OMPD_target:
11430     case OMPD_target_simd:
11431     case OMPD_target_teams_distribute:
11432     case OMPD_target_teams_distribute_simd:
11433     case OMPD_target_teams_distribute_parallel_for:
11434     case OMPD_target_teams_distribute_parallel_for_simd:
11435     case OMPD_target_teams:
11436     case OMPD_target_parallel:
11437     case OMPD_target_parallel_for:
11438     case OMPD_target_parallel_for_simd:
11439     case OMPD_requires:
11440     case OMPD_metadirective:
11441     case OMPD_unknown:
11442     default:
11443       llvm_unreachable("Unexpected standalone target data directive.");
11444       break;
11445     }
11446     CGF.EmitRuntimeCall(
11447         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11448         OffloadingArgs);
11449   };
11450 
11451   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11452                           &MapNamesArray](CodeGenFunction &CGF,
11453                                           PrePostActionTy &) {
11454     // Fill up the arrays with all the mapped variables.
11455     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11456 
11457     // Get map clause information.
11458     MappableExprsHandler MEHandler(D, CGF);
11459     MEHandler.generateAllInfo(CombinedInfo);
11460 
11461     TargetDataInfo Info;
11462     // Fill up the arrays and create the arguments.
11463     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11464                          /*IsNonContiguous=*/true);
11465     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11466                              D.hasClausesOfKind<OMPNowaitClause>();
11467     emitOffloadingArraysArgument(
11468         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11469         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11470         {/*ForEndTask=*/false});
11471     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11472     InputInfo.BasePointersArray =
11473         Address(Info.BasePointersArray, CGM.getPointerAlign());
11474     InputInfo.PointersArray =
11475         Address(Info.PointersArray, CGM.getPointerAlign());
11476     InputInfo.SizesArray =
11477         Address(Info.SizesArray, CGM.getPointerAlign());
11478     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11479     MapTypesArray = Info.MapTypesArray;
11480     MapNamesArray = Info.MapNamesArray;
11481     if (RequiresOuterTask)
11482       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11483     else
11484       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11485   };
11486 
11487   if (IfCond) {
11488     emitIfClause(CGF, IfCond, TargetThenGen,
11489                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11490   } else {
11491     RegionCodeGenTy ThenRCG(TargetThenGen);
11492     ThenRCG(CGF);
11493   }
11494 }
11495 
11496 namespace {
11497   /// Kind of parameter in a function with 'declare simd' directive.
11498   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11499   /// Attribute set of the parameter.
11500   struct ParamAttrTy {
11501     ParamKindTy Kind = Vector;
11502     llvm::APSInt StrideOrArg;
11503     llvm::APSInt Alignment;
11504   };
11505 } // namespace
11506 
11507 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11508                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11509   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11510   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11511   // of that clause. The VLEN value must be power of 2.
11512   // In other case the notion of the function`s "characteristic data type" (CDT)
11513   // is used to compute the vector length.
11514   // CDT is defined in the following order:
11515   //   a) For non-void function, the CDT is the return type.
11516   //   b) If the function has any non-uniform, non-linear parameters, then the
11517   //   CDT is the type of the first such parameter.
11518   //   c) If the CDT determined by a) or b) above is struct, union, or class
11519   //   type which is pass-by-value (except for the type that maps to the
11520   //   built-in complex data type), the characteristic data type is int.
11521   //   d) If none of the above three cases is applicable, the CDT is int.
11522   // The VLEN is then determined based on the CDT and the size of vector
11523   // register of that ISA for which current vector version is generated. The
11524   // VLEN is computed using the formula below:
11525   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11526   // where vector register size specified in section 3.2.1 Registers and the
11527   // Stack Frame of original AMD64 ABI document.
11528   QualType RetType = FD->getReturnType();
11529   if (RetType.isNull())
11530     return 0;
11531   ASTContext &C = FD->getASTContext();
11532   QualType CDT;
11533   if (!RetType.isNull() && !RetType->isVoidType()) {
11534     CDT = RetType;
11535   } else {
11536     unsigned Offset = 0;
11537     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11538       if (ParamAttrs[Offset].Kind == Vector)
11539         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11540       ++Offset;
11541     }
11542     if (CDT.isNull()) {
11543       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11544         if (ParamAttrs[I + Offset].Kind == Vector) {
11545           CDT = FD->getParamDecl(I)->getType();
11546           break;
11547         }
11548       }
11549     }
11550   }
11551   if (CDT.isNull())
11552     CDT = C.IntTy;
11553   CDT = CDT->getCanonicalTypeUnqualified();
11554   if (CDT->isRecordType() || CDT->isUnionType())
11555     CDT = C.IntTy;
11556   return C.getTypeSize(CDT);
11557 }
11558 
11559 static void
11560 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11561                            const llvm::APSInt &VLENVal,
11562                            ArrayRef<ParamAttrTy> ParamAttrs,
11563                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11564   struct ISADataTy {
11565     char ISA;
11566     unsigned VecRegSize;
11567   };
11568   ISADataTy ISAData[] = {
11569       {
11570           'b', 128
11571       }, // SSE
11572       {
11573           'c', 256
11574       }, // AVX
11575       {
11576           'd', 256
11577       }, // AVX2
11578       {
11579           'e', 512
11580       }, // AVX512
11581   };
11582   llvm::SmallVector<char, 2> Masked;
11583   switch (State) {
11584   case OMPDeclareSimdDeclAttr::BS_Undefined:
11585     Masked.push_back('N');
11586     Masked.push_back('M');
11587     break;
11588   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11589     Masked.push_back('N');
11590     break;
11591   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11592     Masked.push_back('M');
11593     break;
11594   }
11595   for (char Mask : Masked) {
11596     for (const ISADataTy &Data : ISAData) {
11597       SmallString<256> Buffer;
11598       llvm::raw_svector_ostream Out(Buffer);
11599       Out << "_ZGV" << Data.ISA << Mask;
11600       if (!VLENVal) {
11601         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11602         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11603         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11604       } else {
11605         Out << VLENVal;
11606       }
11607       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11608         switch (ParamAttr.Kind){
11609         case LinearWithVarStride:
11610           Out << 's' << ParamAttr.StrideOrArg;
11611           break;
11612         case Linear:
11613           Out << 'l';
11614           if (ParamAttr.StrideOrArg != 1)
11615             Out << ParamAttr.StrideOrArg;
11616           break;
11617         case Uniform:
11618           Out << 'u';
11619           break;
11620         case Vector:
11621           Out << 'v';
11622           break;
11623         }
11624         if (!!ParamAttr.Alignment)
11625           Out << 'a' << ParamAttr.Alignment;
11626       }
11627       Out << '_' << Fn->getName();
11628       Fn->addFnAttr(Out.str());
11629     }
11630   }
11631 }
11632 
11633 // This are the Functions that are needed to mangle the name of the
11634 // vector functions generated by the compiler, according to the rules
11635 // defined in the "Vector Function ABI specifications for AArch64",
11636 // available at
11637 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11638 
11639 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11640 ///
11641 /// TODO: Need to implement the behavior for reference marked with a
11642 /// var or no linear modifiers (1.b in the section). For this, we
11643 /// need to extend ParamKindTy to support the linear modifiers.
11644 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11645   QT = QT.getCanonicalType();
11646 
11647   if (QT->isVoidType())
11648     return false;
11649 
11650   if (Kind == ParamKindTy::Uniform)
11651     return false;
11652 
11653   if (Kind == ParamKindTy::Linear)
11654     return false;
11655 
11656   // TODO: Handle linear references with modifiers
11657 
11658   if (Kind == ParamKindTy::LinearWithVarStride)
11659     return false;
11660 
11661   return true;
11662 }
11663 
11664 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11665 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11666   QT = QT.getCanonicalType();
11667   unsigned Size = C.getTypeSize(QT);
11668 
11669   // Only scalars and complex within 16 bytes wide set PVB to true.
11670   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11671     return false;
11672 
11673   if (QT->isFloatingType())
11674     return true;
11675 
11676   if (QT->isIntegerType())
11677     return true;
11678 
11679   if (QT->isPointerType())
11680     return true;
11681 
11682   // TODO: Add support for complex types (section 3.1.2, item 2).
11683 
11684   return false;
11685 }
11686 
11687 /// Computes the lane size (LS) of a return type or of an input parameter,
11688 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11689 /// TODO: Add support for references, section 3.2.1, item 1.
11690 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11691   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11692     QualType PTy = QT.getCanonicalType()->getPointeeType();
11693     if (getAArch64PBV(PTy, C))
11694       return C.getTypeSize(PTy);
11695   }
11696   if (getAArch64PBV(QT, C))
11697     return C.getTypeSize(QT);
11698 
11699   return C.getTypeSize(C.getUIntPtrType());
11700 }
11701 
11702 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11703 // signature of the scalar function, as defined in 3.2.2 of the
11704 // AAVFABI.
11705 static std::tuple<unsigned, unsigned, bool>
11706 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11707   QualType RetType = FD->getReturnType().getCanonicalType();
11708 
11709   ASTContext &C = FD->getASTContext();
11710 
11711   bool OutputBecomesInput = false;
11712 
11713   llvm::SmallVector<unsigned, 8> Sizes;
11714   if (!RetType->isVoidType()) {
11715     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11716     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11717       OutputBecomesInput = true;
11718   }
11719   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11720     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11721     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11722   }
11723 
11724   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11725   // The LS of a function parameter / return value can only be a power
11726   // of 2, starting from 8 bits, up to 128.
11727   assert(std::all_of(Sizes.begin(), Sizes.end(),
11728                      [](unsigned Size) {
11729                        return Size == 8 || Size == 16 || Size == 32 ||
11730                               Size == 64 || Size == 128;
11731                      }) &&
11732          "Invalid size");
11733 
11734   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11735                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11736                          OutputBecomesInput);
11737 }
11738 
11739 /// Mangle the parameter part of the vector function name according to
11740 /// their OpenMP classification. The mangling function is defined in
11741 /// section 3.5 of the AAVFABI.
11742 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11743   SmallString<256> Buffer;
11744   llvm::raw_svector_ostream Out(Buffer);
11745   for (const auto &ParamAttr : ParamAttrs) {
11746     switch (ParamAttr.Kind) {
11747     case LinearWithVarStride:
11748       Out << "ls" << ParamAttr.StrideOrArg;
11749       break;
11750     case Linear:
11751       Out << 'l';
11752       // Don't print the step value if it is not present or if it is
11753       // equal to 1.
11754       if (ParamAttr.StrideOrArg != 1)
11755         Out << ParamAttr.StrideOrArg;
11756       break;
11757     case Uniform:
11758       Out << 'u';
11759       break;
11760     case Vector:
11761       Out << 'v';
11762       break;
11763     }
11764 
11765     if (!!ParamAttr.Alignment)
11766       Out << 'a' << ParamAttr.Alignment;
11767   }
11768 
11769   return std::string(Out.str());
11770 }
11771 
11772 // Function used to add the attribute. The parameter `VLEN` is
11773 // templated to allow the use of "x" when targeting scalable functions
11774 // for SVE.
11775 template <typename T>
11776 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11777                                  char ISA, StringRef ParSeq,
11778                                  StringRef MangledName, bool OutputBecomesInput,
11779                                  llvm::Function *Fn) {
11780   SmallString<256> Buffer;
11781   llvm::raw_svector_ostream Out(Buffer);
11782   Out << Prefix << ISA << LMask << VLEN;
11783   if (OutputBecomesInput)
11784     Out << "v";
11785   Out << ParSeq << "_" << MangledName;
11786   Fn->addFnAttr(Out.str());
11787 }
11788 
11789 // Helper function to generate the Advanced SIMD names depending on
11790 // the value of the NDS when simdlen is not present.
11791 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11792                                       StringRef Prefix, char ISA,
11793                                       StringRef ParSeq, StringRef MangledName,
11794                                       bool OutputBecomesInput,
11795                                       llvm::Function *Fn) {
11796   switch (NDS) {
11797   case 8:
11798     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11799                          OutputBecomesInput, Fn);
11800     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11801                          OutputBecomesInput, Fn);
11802     break;
11803   case 16:
11804     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11805                          OutputBecomesInput, Fn);
11806     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11807                          OutputBecomesInput, Fn);
11808     break;
11809   case 32:
11810     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11811                          OutputBecomesInput, Fn);
11812     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11813                          OutputBecomesInput, Fn);
11814     break;
11815   case 64:
11816   case 128:
11817     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11818                          OutputBecomesInput, Fn);
11819     break;
11820   default:
11821     llvm_unreachable("Scalar type is too wide.");
11822   }
11823 }
11824 
11825 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11826 static void emitAArch64DeclareSimdFunction(
11827     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11828     ArrayRef<ParamAttrTy> ParamAttrs,
11829     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11830     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11831 
11832   // Get basic data for building the vector signature.
11833   const auto Data = getNDSWDS(FD, ParamAttrs);
11834   const unsigned NDS = std::get<0>(Data);
11835   const unsigned WDS = std::get<1>(Data);
11836   const bool OutputBecomesInput = std::get<2>(Data);
11837 
11838   // Check the values provided via `simdlen` by the user.
11839   // 1. A `simdlen(1)` doesn't produce vector signatures,
11840   if (UserVLEN == 1) {
11841     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11842         DiagnosticsEngine::Warning,
11843         "The clause simdlen(1) has no effect when targeting aarch64.");
11844     CGM.getDiags().Report(SLoc, DiagID);
11845     return;
11846   }
11847 
11848   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11849   // Advanced SIMD output.
11850   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11851     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11852         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11853                                     "power of 2 when targeting Advanced SIMD.");
11854     CGM.getDiags().Report(SLoc, DiagID);
11855     return;
11856   }
11857 
11858   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11859   // limits.
11860   if (ISA == 's' && UserVLEN != 0) {
11861     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11862       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11863           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11864                                       "lanes in the architectural constraints "
11865                                       "for SVE (min is 128-bit, max is "
11866                                       "2048-bit, by steps of 128-bit)");
11867       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11868       return;
11869     }
11870   }
11871 
11872   // Sort out parameter sequence.
11873   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11874   StringRef Prefix = "_ZGV";
11875   // Generate simdlen from user input (if any).
11876   if (UserVLEN) {
11877     if (ISA == 's') {
11878       // SVE generates only a masked function.
11879       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11880                            OutputBecomesInput, Fn);
11881     } else {
11882       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11883       // Advanced SIMD generates one or two functions, depending on
11884       // the `[not]inbranch` clause.
11885       switch (State) {
11886       case OMPDeclareSimdDeclAttr::BS_Undefined:
11887         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11888                              OutputBecomesInput, Fn);
11889         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11890                              OutputBecomesInput, Fn);
11891         break;
11892       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11893         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11894                              OutputBecomesInput, Fn);
11895         break;
11896       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11897         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11898                              OutputBecomesInput, Fn);
11899         break;
11900       }
11901     }
11902   } else {
11903     // If no user simdlen is provided, follow the AAVFABI rules for
11904     // generating the vector length.
11905     if (ISA == 's') {
11906       // SVE, section 3.4.1, item 1.
11907       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11908                            OutputBecomesInput, Fn);
11909     } else {
11910       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11911       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11912       // two vector names depending on the use of the clause
11913       // `[not]inbranch`.
11914       switch (State) {
11915       case OMPDeclareSimdDeclAttr::BS_Undefined:
11916         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11917                                   OutputBecomesInput, Fn);
11918         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11919                                   OutputBecomesInput, Fn);
11920         break;
11921       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11922         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11923                                   OutputBecomesInput, Fn);
11924         break;
11925       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11926         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11927                                   OutputBecomesInput, Fn);
11928         break;
11929       }
11930     }
11931   }
11932 }
11933 
11934 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11935                                               llvm::Function *Fn) {
11936   ASTContext &C = CGM.getContext();
11937   FD = FD->getMostRecentDecl();
11938   // Map params to their positions in function decl.
11939   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11940   if (isa<CXXMethodDecl>(FD))
11941     ParamPositions.try_emplace(FD, 0);
11942   unsigned ParamPos = ParamPositions.size();
11943   for (const ParmVarDecl *P : FD->parameters()) {
11944     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11945     ++ParamPos;
11946   }
11947   while (FD) {
11948     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11949       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11950       // Mark uniform parameters.
11951       for (const Expr *E : Attr->uniforms()) {
11952         E = E->IgnoreParenImpCasts();
11953         unsigned Pos;
11954         if (isa<CXXThisExpr>(E)) {
11955           Pos = ParamPositions[FD];
11956         } else {
11957           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11958                                 ->getCanonicalDecl();
11959           Pos = ParamPositions[PVD];
11960         }
11961         ParamAttrs[Pos].Kind = Uniform;
11962       }
11963       // Get alignment info.
11964       auto NI = Attr->alignments_begin();
11965       for (const Expr *E : Attr->aligneds()) {
11966         E = E->IgnoreParenImpCasts();
11967         unsigned Pos;
11968         QualType ParmTy;
11969         if (isa<CXXThisExpr>(E)) {
11970           Pos = ParamPositions[FD];
11971           ParmTy = E->getType();
11972         } else {
11973           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11974                                 ->getCanonicalDecl();
11975           Pos = ParamPositions[PVD];
11976           ParmTy = PVD->getType();
11977         }
11978         ParamAttrs[Pos].Alignment =
11979             (*NI)
11980                 ? (*NI)->EvaluateKnownConstInt(C)
11981                 : llvm::APSInt::getUnsigned(
11982                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11983                           .getQuantity());
11984         ++NI;
11985       }
11986       // Mark linear parameters.
11987       auto SI = Attr->steps_begin();
11988       auto MI = Attr->modifiers_begin();
11989       for (const Expr *E : Attr->linears()) {
11990         E = E->IgnoreParenImpCasts();
11991         unsigned Pos;
11992         // Rescaling factor needed to compute the linear parameter
11993         // value in the mangled name.
11994         unsigned PtrRescalingFactor = 1;
11995         if (isa<CXXThisExpr>(E)) {
11996           Pos = ParamPositions[FD];
11997         } else {
11998           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11999                                 ->getCanonicalDecl();
12000           Pos = ParamPositions[PVD];
12001           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12002             PtrRescalingFactor = CGM.getContext()
12003                                      .getTypeSizeInChars(P->getPointeeType())
12004                                      .getQuantity();
12005         }
12006         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12007         ParamAttr.Kind = Linear;
12008         // Assuming a stride of 1, for `linear` without modifiers.
12009         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12010         if (*SI) {
12011           Expr::EvalResult Result;
12012           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12013             if (const auto *DRE =
12014                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12015               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
12016                 ParamAttr.Kind = LinearWithVarStride;
12017                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12018                     ParamPositions[StridePVD->getCanonicalDecl()]);
12019               }
12020             }
12021           } else {
12022             ParamAttr.StrideOrArg = Result.Val.getInt();
12023           }
12024         }
12025         // If we are using a linear clause on a pointer, we need to
12026         // rescale the value of linear_step with the byte size of the
12027         // pointee type.
12028         if (Linear == ParamAttr.Kind)
12029           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12030         ++SI;
12031         ++MI;
12032       }
12033       llvm::APSInt VLENVal;
12034       SourceLocation ExprLoc;
12035       const Expr *VLENExpr = Attr->getSimdlen();
12036       if (VLENExpr) {
12037         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12038         ExprLoc = VLENExpr->getExprLoc();
12039       }
12040       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12041       if (CGM.getTriple().isX86()) {
12042         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12043       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12044         unsigned VLEN = VLENVal.getExtValue();
12045         StringRef MangledName = Fn->getName();
12046         if (CGM.getTarget().hasFeature("sve"))
12047           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12048                                          MangledName, 's', 128, Fn, ExprLoc);
12049         if (CGM.getTarget().hasFeature("neon"))
12050           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12051                                          MangledName, 'n', 128, Fn, ExprLoc);
12052       }
12053     }
12054     FD = FD->getPreviousDecl();
12055   }
12056 }
12057 
12058 namespace {
12059 /// Cleanup action for doacross support.
12060 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12061 public:
12062   static const int DoacrossFinArgs = 2;
12063 
12064 private:
12065   llvm::FunctionCallee RTLFn;
12066   llvm::Value *Args[DoacrossFinArgs];
12067 
12068 public:
12069   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12070                     ArrayRef<llvm::Value *> CallArgs)
12071       : RTLFn(RTLFn) {
12072     assert(CallArgs.size() == DoacrossFinArgs);
12073     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12074   }
12075   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12076     if (!CGF.HaveInsertPoint())
12077       return;
12078     CGF.EmitRuntimeCall(RTLFn, Args);
12079   }
12080 };
12081 } // namespace
12082 
12083 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12084                                        const OMPLoopDirective &D,
12085                                        ArrayRef<Expr *> NumIterations) {
12086   if (!CGF.HaveInsertPoint())
12087     return;
12088 
12089   ASTContext &C = CGM.getContext();
12090   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12091   RecordDecl *RD;
12092   if (KmpDimTy.isNull()) {
12093     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12094     //  kmp_int64 lo; // lower
12095     //  kmp_int64 up; // upper
12096     //  kmp_int64 st; // stride
12097     // };
12098     RD = C.buildImplicitRecord("kmp_dim");
12099     RD->startDefinition();
12100     addFieldToRecordDecl(C, RD, Int64Ty);
12101     addFieldToRecordDecl(C, RD, Int64Ty);
12102     addFieldToRecordDecl(C, RD, Int64Ty);
12103     RD->completeDefinition();
12104     KmpDimTy = C.getRecordType(RD);
12105   } else {
12106     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12107   }
12108   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12109   QualType ArrayTy =
12110       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12111 
12112   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12113   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12114   enum { LowerFD = 0, UpperFD, StrideFD };
12115   // Fill dims with data.
12116   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12117     LValue DimsLVal = CGF.MakeAddrLValue(
12118         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12119     // dims.upper = num_iterations;
12120     LValue UpperLVal = CGF.EmitLValueForField(
12121         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12122     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12123         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12124         Int64Ty, NumIterations[I]->getExprLoc());
12125     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12126     // dims.stride = 1;
12127     LValue StrideLVal = CGF.EmitLValueForField(
12128         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12129     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12130                           StrideLVal);
12131   }
12132 
12133   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12134   // kmp_int32 num_dims, struct kmp_dim * dims);
12135   llvm::Value *Args[] = {
12136       emitUpdateLocation(CGF, D.getBeginLoc()),
12137       getThreadID(CGF, D.getBeginLoc()),
12138       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12139       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12140           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12141           CGM.VoidPtrTy)};
12142 
12143   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12144       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12145   CGF.EmitRuntimeCall(RTLFn, Args);
12146   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12147       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12148   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12149       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12150   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12151                                              llvm::makeArrayRef(FiniArgs));
12152 }
12153 
12154 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12155                                           const OMPDependClause *C) {
12156   QualType Int64Ty =
12157       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12158   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12159   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12160       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12161   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12162   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12163     const Expr *CounterVal = C->getLoopData(I);
12164     assert(CounterVal);
12165     llvm::Value *CntVal = CGF.EmitScalarConversion(
12166         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12167         CounterVal->getExprLoc());
12168     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12169                           /*Volatile=*/false, Int64Ty);
12170   }
12171   llvm::Value *Args[] = {
12172       emitUpdateLocation(CGF, C->getBeginLoc()),
12173       getThreadID(CGF, C->getBeginLoc()),
12174       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12175   llvm::FunctionCallee RTLFn;
12176   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12177     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12178                                                   OMPRTL___kmpc_doacross_post);
12179   } else {
12180     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12181     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12182                                                   OMPRTL___kmpc_doacross_wait);
12183   }
12184   CGF.EmitRuntimeCall(RTLFn, Args);
12185 }
12186 
12187 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12188                                llvm::FunctionCallee Callee,
12189                                ArrayRef<llvm::Value *> Args) const {
12190   assert(Loc.isValid() && "Outlined function call location must be valid.");
12191   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12192 
12193   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12194     if (Fn->doesNotThrow()) {
12195       CGF.EmitNounwindRuntimeCall(Fn, Args);
12196       return;
12197     }
12198   }
12199   CGF.EmitRuntimeCall(Callee, Args);
12200 }
12201 
12202 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12203     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12204     ArrayRef<llvm::Value *> Args) const {
12205   emitCall(CGF, Loc, OutlinedFn, Args);
12206 }
12207 
12208 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12209   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12210     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12211       HasEmittedDeclareTargetRegion = true;
12212 }
12213 
12214 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12215                                              const VarDecl *NativeParam,
12216                                              const VarDecl *TargetParam) const {
12217   return CGF.GetAddrOfLocalVar(NativeParam);
12218 }
12219 
12220 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12221                                                    const VarDecl *VD) {
12222   if (!VD)
12223     return Address::invalid();
12224   Address UntiedAddr = Address::invalid();
12225   Address UntiedRealAddr = Address::invalid();
12226   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12227   if (It != FunctionToUntiedTaskStackMap.end()) {
12228     const UntiedLocalVarsAddressesMap &UntiedData =
12229         UntiedLocalVarsStack[It->second];
12230     auto I = UntiedData.find(VD);
12231     if (I != UntiedData.end()) {
12232       UntiedAddr = I->second.first;
12233       UntiedRealAddr = I->second.second;
12234     }
12235   }
12236   const VarDecl *CVD = VD->getCanonicalDecl();
12237   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12238     // Use the default allocation.
12239     if (!isAllocatableDecl(VD))
12240       return UntiedAddr;
12241     llvm::Value *Size;
12242     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12243     if (CVD->getType()->isVariablyModifiedType()) {
12244       Size = CGF.getTypeSize(CVD->getType());
12245       // Align the size: ((size + align - 1) / align) * align
12246       Size = CGF.Builder.CreateNUWAdd(
12247           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12248       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12249       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12250     } else {
12251       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12252       Size = CGM.getSize(Sz.alignTo(Align));
12253     }
12254     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12255     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12256     assert(AA->getAllocator() &&
12257            "Expected allocator expression for non-default allocator.");
12258     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12259     // According to the standard, the original allocator type is a enum
12260     // (integer). Convert to pointer type, if required.
12261     Allocator = CGF.EmitScalarConversion(
12262         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12263         AA->getAllocator()->getExprLoc());
12264     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12265 
12266     llvm::Value *Addr =
12267         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12268                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12269                             Args, getName({CVD->getName(), ".void.addr"}));
12270     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12271         CGM.getModule(), OMPRTL___kmpc_free);
12272     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12273     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12274         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12275     if (UntiedAddr.isValid())
12276       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12277 
12278     // Cleanup action for allocate support.
12279     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12280       llvm::FunctionCallee RTLFn;
12281       SourceLocation::UIntTy LocEncoding;
12282       Address Addr;
12283       const Expr *Allocator;
12284 
12285     public:
12286       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12287                            SourceLocation::UIntTy LocEncoding, Address Addr,
12288                            const Expr *Allocator)
12289           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12290             Allocator(Allocator) {}
12291       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12292         if (!CGF.HaveInsertPoint())
12293           return;
12294         llvm::Value *Args[3];
12295         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12296             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12297         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12298             Addr.getPointer(), CGF.VoidPtrTy);
12299         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12300         // According to the standard, the original allocator type is a enum
12301         // (integer). Convert to pointer type, if required.
12302         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12303                                             CGF.getContext().VoidPtrTy,
12304                                             Allocator->getExprLoc());
12305         Args[2] = AllocVal;
12306 
12307         CGF.EmitRuntimeCall(RTLFn, Args);
12308       }
12309     };
12310     Address VDAddr =
12311         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12312     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12313         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12314         VDAddr, AA->getAllocator());
12315     if (UntiedRealAddr.isValid())
12316       if (auto *Region =
12317               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12318         Region->emitUntiedSwitch(CGF);
12319     return VDAddr;
12320   }
12321   return UntiedAddr;
12322 }
12323 
12324 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12325                                              const VarDecl *VD) const {
12326   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12327   if (It == FunctionToUntiedTaskStackMap.end())
12328     return false;
12329   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12330 }
12331 
12332 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12333     CodeGenModule &CGM, const OMPLoopDirective &S)
12334     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12335   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12336   if (!NeedToPush)
12337     return;
12338   NontemporalDeclsSet &DS =
12339       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12340   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12341     for (const Stmt *Ref : C->private_refs()) {
12342       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12343       const ValueDecl *VD;
12344       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12345         VD = DRE->getDecl();
12346       } else {
12347         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12348         assert((ME->isImplicitCXXThis() ||
12349                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12350                "Expected member of current class.");
12351         VD = ME->getMemberDecl();
12352       }
12353       DS.insert(VD);
12354     }
12355   }
12356 }
12357 
12358 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12359   if (!NeedToPush)
12360     return;
12361   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12362 }
12363 
12364 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12365     CodeGenFunction &CGF,
12366     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12367                           std::pair<Address, Address>> &LocalVars)
12368     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12369   if (!NeedToPush)
12370     return;
12371   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12372       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12373   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12374 }
12375 
12376 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12377   if (!NeedToPush)
12378     return;
12379   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12380 }
12381 
12382 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12383   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12384 
12385   return llvm::any_of(
12386       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12387       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12388 }
12389 
12390 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12391     const OMPExecutableDirective &S,
12392     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12393     const {
12394   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12395   // Vars in target/task regions must be excluded completely.
12396   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12397       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12398     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12399     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12400     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12401     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12402       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12403         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12404     }
12405   }
12406   // Exclude vars in private clauses.
12407   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12408     for (const Expr *Ref : C->varlists()) {
12409       if (!Ref->getType()->isScalarType())
12410         continue;
12411       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12412       if (!DRE)
12413         continue;
12414       NeedToCheckForLPCs.insert(DRE->getDecl());
12415     }
12416   }
12417   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12418     for (const Expr *Ref : C->varlists()) {
12419       if (!Ref->getType()->isScalarType())
12420         continue;
12421       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12422       if (!DRE)
12423         continue;
12424       NeedToCheckForLPCs.insert(DRE->getDecl());
12425     }
12426   }
12427   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12428     for (const Expr *Ref : C->varlists()) {
12429       if (!Ref->getType()->isScalarType())
12430         continue;
12431       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12432       if (!DRE)
12433         continue;
12434       NeedToCheckForLPCs.insert(DRE->getDecl());
12435     }
12436   }
12437   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12438     for (const Expr *Ref : C->varlists()) {
12439       if (!Ref->getType()->isScalarType())
12440         continue;
12441       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12442       if (!DRE)
12443         continue;
12444       NeedToCheckForLPCs.insert(DRE->getDecl());
12445     }
12446   }
12447   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12448     for (const Expr *Ref : C->varlists()) {
12449       if (!Ref->getType()->isScalarType())
12450         continue;
12451       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12452       if (!DRE)
12453         continue;
12454       NeedToCheckForLPCs.insert(DRE->getDecl());
12455     }
12456   }
12457   for (const Decl *VD : NeedToCheckForLPCs) {
12458     for (const LastprivateConditionalData &Data :
12459          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12460       if (Data.DeclToUniqueName.count(VD) > 0) {
12461         if (!Data.Disabled)
12462           NeedToAddForLPCsAsDisabled.insert(VD);
12463         break;
12464       }
12465     }
12466   }
12467 }
12468 
12469 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12470     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12471     : CGM(CGF.CGM),
12472       Action((CGM.getLangOpts().OpenMP >= 50 &&
12473               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12474                            [](const OMPLastprivateClause *C) {
12475                              return C->getKind() ==
12476                                     OMPC_LASTPRIVATE_conditional;
12477                            }))
12478                  ? ActionToDo::PushAsLastprivateConditional
12479                  : ActionToDo::DoNotPush) {
12480   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12481   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12482     return;
12483   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12484          "Expected a push action.");
12485   LastprivateConditionalData &Data =
12486       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12487   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12488     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12489       continue;
12490 
12491     for (const Expr *Ref : C->varlists()) {
12492       Data.DeclToUniqueName.insert(std::make_pair(
12493           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12494           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12495     }
12496   }
12497   Data.IVLVal = IVLVal;
12498   Data.Fn = CGF.CurFn;
12499 }
12500 
12501 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12502     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12503     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12504   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12505   if (CGM.getLangOpts().OpenMP < 50)
12506     return;
12507   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12508   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12509   if (!NeedToAddForLPCsAsDisabled.empty()) {
12510     Action = ActionToDo::DisableLastprivateConditional;
12511     LastprivateConditionalData &Data =
12512         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12513     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12514       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12515     Data.Fn = CGF.CurFn;
12516     Data.Disabled = true;
12517   }
12518 }
12519 
12520 CGOpenMPRuntime::LastprivateConditionalRAII
12521 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12522     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12523   return LastprivateConditionalRAII(CGF, S);
12524 }
12525 
12526 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12527   if (CGM.getLangOpts().OpenMP < 50)
12528     return;
12529   if (Action == ActionToDo::DisableLastprivateConditional) {
12530     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12531            "Expected list of disabled private vars.");
12532     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12533   }
12534   if (Action == ActionToDo::PushAsLastprivateConditional) {
12535     assert(
12536         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12537         "Expected list of lastprivate conditional vars.");
12538     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12539   }
12540 }
12541 
12542 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12543                                                         const VarDecl *VD) {
12544   ASTContext &C = CGM.getContext();
12545   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12546   if (I == LastprivateConditionalToTypes.end())
12547     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12548   QualType NewType;
12549   const FieldDecl *VDField;
12550   const FieldDecl *FiredField;
12551   LValue BaseLVal;
12552   auto VI = I->getSecond().find(VD);
12553   if (VI == I->getSecond().end()) {
12554     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12555     RD->startDefinition();
12556     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12557     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12558     RD->completeDefinition();
12559     NewType = C.getRecordType(RD);
12560     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12561     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12562     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12563   } else {
12564     NewType = std::get<0>(VI->getSecond());
12565     VDField = std::get<1>(VI->getSecond());
12566     FiredField = std::get<2>(VI->getSecond());
12567     BaseLVal = std::get<3>(VI->getSecond());
12568   }
12569   LValue FiredLVal =
12570       CGF.EmitLValueForField(BaseLVal, FiredField);
12571   CGF.EmitStoreOfScalar(
12572       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12573       FiredLVal);
12574   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12575 }
12576 
12577 namespace {
12578 /// Checks if the lastprivate conditional variable is referenced in LHS.
12579 class LastprivateConditionalRefChecker final
12580     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12581   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12582   const Expr *FoundE = nullptr;
12583   const Decl *FoundD = nullptr;
12584   StringRef UniqueDeclName;
12585   LValue IVLVal;
12586   llvm::Function *FoundFn = nullptr;
12587   SourceLocation Loc;
12588 
12589 public:
12590   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12591     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12592          llvm::reverse(LPM)) {
12593       auto It = D.DeclToUniqueName.find(E->getDecl());
12594       if (It == D.DeclToUniqueName.end())
12595         continue;
12596       if (D.Disabled)
12597         return false;
12598       FoundE = E;
12599       FoundD = E->getDecl()->getCanonicalDecl();
12600       UniqueDeclName = It->second;
12601       IVLVal = D.IVLVal;
12602       FoundFn = D.Fn;
12603       break;
12604     }
12605     return FoundE == E;
12606   }
12607   bool VisitMemberExpr(const MemberExpr *E) {
12608     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12609       return false;
12610     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12611          llvm::reverse(LPM)) {
12612       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12613       if (It == D.DeclToUniqueName.end())
12614         continue;
12615       if (D.Disabled)
12616         return false;
12617       FoundE = E;
12618       FoundD = E->getMemberDecl()->getCanonicalDecl();
12619       UniqueDeclName = It->second;
12620       IVLVal = D.IVLVal;
12621       FoundFn = D.Fn;
12622       break;
12623     }
12624     return FoundE == E;
12625   }
12626   bool VisitStmt(const Stmt *S) {
12627     for (const Stmt *Child : S->children()) {
12628       if (!Child)
12629         continue;
12630       if (const auto *E = dyn_cast<Expr>(Child))
12631         if (!E->isGLValue())
12632           continue;
12633       if (Visit(Child))
12634         return true;
12635     }
12636     return false;
12637   }
12638   explicit LastprivateConditionalRefChecker(
12639       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12640       : LPM(LPM) {}
12641   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12642   getFoundData() const {
12643     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12644   }
12645 };
12646 } // namespace
12647 
12648 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12649                                                        LValue IVLVal,
12650                                                        StringRef UniqueDeclName,
12651                                                        LValue LVal,
12652                                                        SourceLocation Loc) {
12653   // Last updated loop counter for the lastprivate conditional var.
12654   // int<xx> last_iv = 0;
12655   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12656   llvm::Constant *LastIV =
12657       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12658   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12659       IVLVal.getAlignment().getAsAlign());
12660   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12661 
12662   // Last value of the lastprivate conditional.
12663   // decltype(priv_a) last_a;
12664   llvm::Constant *Last = getOrCreateInternalVariable(
12665       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12666   cast<llvm::GlobalVariable>(Last)->setAlignment(
12667       LVal.getAlignment().getAsAlign());
12668   LValue LastLVal =
12669       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12670 
12671   // Global loop counter. Required to handle inner parallel-for regions.
12672   // iv
12673   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12674 
12675   // #pragma omp critical(a)
12676   // if (last_iv <= iv) {
12677   //   last_iv = iv;
12678   //   last_a = priv_a;
12679   // }
12680   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12681                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12682     Action.Enter(CGF);
12683     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12684     // (last_iv <= iv) ? Check if the variable is updated and store new
12685     // value in global var.
12686     llvm::Value *CmpRes;
12687     if (IVLVal.getType()->isSignedIntegerType()) {
12688       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12689     } else {
12690       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12691              "Loop iteration variable must be integer.");
12692       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12693     }
12694     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12695     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12696     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12697     // {
12698     CGF.EmitBlock(ThenBB);
12699 
12700     //   last_iv = iv;
12701     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12702 
12703     //   last_a = priv_a;
12704     switch (CGF.getEvaluationKind(LVal.getType())) {
12705     case TEK_Scalar: {
12706       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12707       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12708       break;
12709     }
12710     case TEK_Complex: {
12711       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12712       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12713       break;
12714     }
12715     case TEK_Aggregate:
12716       llvm_unreachable(
12717           "Aggregates are not supported in lastprivate conditional.");
12718     }
12719     // }
12720     CGF.EmitBranch(ExitBB);
12721     // There is no need to emit line number for unconditional branch.
12722     (void)ApplyDebugLocation::CreateEmpty(CGF);
12723     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12724   };
12725 
12726   if (CGM.getLangOpts().OpenMPSimd) {
12727     // Do not emit as a critical region as no parallel region could be emitted.
12728     RegionCodeGenTy ThenRCG(CodeGen);
12729     ThenRCG(CGF);
12730   } else {
12731     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12732   }
12733 }
12734 
12735 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12736                                                          const Expr *LHS) {
12737   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12738     return;
12739   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12740   if (!Checker.Visit(LHS))
12741     return;
12742   const Expr *FoundE;
12743   const Decl *FoundD;
12744   StringRef UniqueDeclName;
12745   LValue IVLVal;
12746   llvm::Function *FoundFn;
12747   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12748       Checker.getFoundData();
12749   if (FoundFn != CGF.CurFn) {
12750     // Special codegen for inner parallel regions.
12751     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12752     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12753     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12754            "Lastprivate conditional is not found in outer region.");
12755     QualType StructTy = std::get<0>(It->getSecond());
12756     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12757     LValue PrivLVal = CGF.EmitLValue(FoundE);
12758     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12759         PrivLVal.getAddress(CGF),
12760         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12761     LValue BaseLVal =
12762         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12763     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12764     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12765                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12766                         FiredLVal, llvm::AtomicOrdering::Unordered,
12767                         /*IsVolatile=*/true, /*isInit=*/false);
12768     return;
12769   }
12770 
12771   // Private address of the lastprivate conditional in the current context.
12772   // priv_a
12773   LValue LVal = CGF.EmitLValue(FoundE);
12774   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12775                                    FoundE->getExprLoc());
12776 }
12777 
12778 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12779     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12780     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12781   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12782     return;
12783   auto Range = llvm::reverse(LastprivateConditionalStack);
12784   auto It = llvm::find_if(
12785       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12786   if (It == Range.end() || It->Fn != CGF.CurFn)
12787     return;
12788   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12789   assert(LPCI != LastprivateConditionalToTypes.end() &&
12790          "Lastprivates must be registered already.");
12791   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12792   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12793   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12794   for (const auto &Pair : It->DeclToUniqueName) {
12795     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12796     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12797       continue;
12798     auto I = LPCI->getSecond().find(Pair.first);
12799     assert(I != LPCI->getSecond().end() &&
12800            "Lastprivate must be rehistered already.");
12801     // bool Cmp = priv_a.Fired != 0;
12802     LValue BaseLVal = std::get<3>(I->getSecond());
12803     LValue FiredLVal =
12804         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12805     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12806     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12807     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12808     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12809     // if (Cmp) {
12810     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12811     CGF.EmitBlock(ThenBB);
12812     Address Addr = CGF.GetAddrOfLocalVar(VD);
12813     LValue LVal;
12814     if (VD->getType()->isReferenceType())
12815       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12816                                            AlignmentSource::Decl);
12817     else
12818       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12819                                 AlignmentSource::Decl);
12820     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12821                                      D.getBeginLoc());
12822     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12823     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12824     // }
12825   }
12826 }
12827 
12828 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12829     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12830     SourceLocation Loc) {
12831   if (CGF.getLangOpts().OpenMP < 50)
12832     return;
12833   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12834   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12835          "Unknown lastprivate conditional variable.");
12836   StringRef UniqueName = It->second;
12837   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12838   // The variable was not updated in the region - exit.
12839   if (!GV)
12840     return;
12841   LValue LPLVal = CGF.MakeAddrLValue(
12842       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12843   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12844   CGF.EmitStoreOfScalar(Res, PrivLVal);
12845 }
12846 
12847 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12848     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12849     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12850   llvm_unreachable("Not supported in SIMD-only mode");
12851 }
12852 
12853 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12854     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12855     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12856   llvm_unreachable("Not supported in SIMD-only mode");
12857 }
12858 
12859 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12860     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12861     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12862     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12863     bool Tied, unsigned &NumberOfParts) {
12864   llvm_unreachable("Not supported in SIMD-only mode");
12865 }
12866 
12867 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12868                                            SourceLocation Loc,
12869                                            llvm::Function *OutlinedFn,
12870                                            ArrayRef<llvm::Value *> CapturedVars,
12871                                            const Expr *IfCond) {
12872   llvm_unreachable("Not supported in SIMD-only mode");
12873 }
12874 
12875 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12876     CodeGenFunction &CGF, StringRef CriticalName,
12877     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12878     const Expr *Hint) {
12879   llvm_unreachable("Not supported in SIMD-only mode");
12880 }
12881 
12882 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12883                                            const RegionCodeGenTy &MasterOpGen,
12884                                            SourceLocation Loc) {
12885   llvm_unreachable("Not supported in SIMD-only mode");
12886 }
12887 
12888 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12889                                            const RegionCodeGenTy &MasterOpGen,
12890                                            SourceLocation Loc,
12891                                            const Expr *Filter) {
12892   llvm_unreachable("Not supported in SIMD-only mode");
12893 }
12894 
12895 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12896                                             SourceLocation Loc) {
12897   llvm_unreachable("Not supported in SIMD-only mode");
12898 }
12899 
12900 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12901     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12902     SourceLocation Loc) {
12903   llvm_unreachable("Not supported in SIMD-only mode");
12904 }
12905 
12906 void CGOpenMPSIMDRuntime::emitSingleRegion(
12907     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12908     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12909     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12910     ArrayRef<const Expr *> AssignmentOps) {
12911   llvm_unreachable("Not supported in SIMD-only mode");
12912 }
12913 
12914 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12915                                             const RegionCodeGenTy &OrderedOpGen,
12916                                             SourceLocation Loc,
12917                                             bool IsThreads) {
12918   llvm_unreachable("Not supported in SIMD-only mode");
12919 }
12920 
12921 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12922                                           SourceLocation Loc,
12923                                           OpenMPDirectiveKind Kind,
12924                                           bool EmitChecks,
12925                                           bool ForceSimpleCall) {
12926   llvm_unreachable("Not supported in SIMD-only mode");
12927 }
12928 
12929 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12930     CodeGenFunction &CGF, SourceLocation Loc,
12931     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12932     bool Ordered, const DispatchRTInput &DispatchValues) {
12933   llvm_unreachable("Not supported in SIMD-only mode");
12934 }
12935 
12936 void CGOpenMPSIMDRuntime::emitForStaticInit(
12937     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12938     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12939   llvm_unreachable("Not supported in SIMD-only mode");
12940 }
12941 
12942 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12943     CodeGenFunction &CGF, SourceLocation Loc,
12944     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12945   llvm_unreachable("Not supported in SIMD-only mode");
12946 }
12947 
12948 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12949                                                      SourceLocation Loc,
12950                                                      unsigned IVSize,
12951                                                      bool IVSigned) {
12952   llvm_unreachable("Not supported in SIMD-only mode");
12953 }
12954 
12955 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12956                                               SourceLocation Loc,
12957                                               OpenMPDirectiveKind DKind) {
12958   llvm_unreachable("Not supported in SIMD-only mode");
12959 }
12960 
12961 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12962                                               SourceLocation Loc,
12963                                               unsigned IVSize, bool IVSigned,
12964                                               Address IL, Address LB,
12965                                               Address UB, Address ST) {
12966   llvm_unreachable("Not supported in SIMD-only mode");
12967 }
12968 
12969 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12970                                                llvm::Value *NumThreads,
12971                                                SourceLocation Loc) {
12972   llvm_unreachable("Not supported in SIMD-only mode");
12973 }
12974 
12975 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12976                                              ProcBindKind ProcBind,
12977                                              SourceLocation Loc) {
12978   llvm_unreachable("Not supported in SIMD-only mode");
12979 }
12980 
12981 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12982                                                     const VarDecl *VD,
12983                                                     Address VDAddr,
12984                                                     SourceLocation Loc) {
12985   llvm_unreachable("Not supported in SIMD-only mode");
12986 }
12987 
12988 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12989     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12990     CodeGenFunction *CGF) {
12991   llvm_unreachable("Not supported in SIMD-only mode");
12992 }
12993 
12994 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12995     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12996   llvm_unreachable("Not supported in SIMD-only mode");
12997 }
12998 
12999 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13000                                     ArrayRef<const Expr *> Vars,
13001                                     SourceLocation Loc,
13002                                     llvm::AtomicOrdering AO) {
13003   llvm_unreachable("Not supported in SIMD-only mode");
13004 }
13005 
13006 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13007                                        const OMPExecutableDirective &D,
13008                                        llvm::Function *TaskFunction,
13009                                        QualType SharedsTy, Address Shareds,
13010                                        const Expr *IfCond,
13011                                        const OMPTaskDataTy &Data) {
13012   llvm_unreachable("Not supported in SIMD-only mode");
13013 }
13014 
13015 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13016     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13017     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13018     const Expr *IfCond, const OMPTaskDataTy &Data) {
13019   llvm_unreachable("Not supported in SIMD-only mode");
13020 }
13021 
13022 void CGOpenMPSIMDRuntime::emitReduction(
13023     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13024     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13025     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13026   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13027   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13028                                  ReductionOps, Options);
13029 }
13030 
13031 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13032     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13033     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13034   llvm_unreachable("Not supported in SIMD-only mode");
13035 }
13036 
13037 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13038                                                 SourceLocation Loc,
13039                                                 bool IsWorksharingReduction) {
13040   llvm_unreachable("Not supported in SIMD-only mode");
13041 }
13042 
13043 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13044                                                   SourceLocation Loc,
13045                                                   ReductionCodeGen &RCG,
13046                                                   unsigned N) {
13047   llvm_unreachable("Not supported in SIMD-only mode");
13048 }
13049 
13050 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13051                                                   SourceLocation Loc,
13052                                                   llvm::Value *ReductionsPtr,
13053                                                   LValue SharedLVal) {
13054   llvm_unreachable("Not supported in SIMD-only mode");
13055 }
13056 
13057 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13058                                            SourceLocation Loc) {
13059   llvm_unreachable("Not supported in SIMD-only mode");
13060 }
13061 
13062 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13063     CodeGenFunction &CGF, SourceLocation Loc,
13064     OpenMPDirectiveKind CancelRegion) {
13065   llvm_unreachable("Not supported in SIMD-only mode");
13066 }
13067 
13068 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13069                                          SourceLocation Loc, const Expr *IfCond,
13070                                          OpenMPDirectiveKind CancelRegion) {
13071   llvm_unreachable("Not supported in SIMD-only mode");
13072 }
13073 
13074 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13075     const OMPExecutableDirective &D, StringRef ParentName,
13076     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13077     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13078   llvm_unreachable("Not supported in SIMD-only mode");
13079 }
13080 
13081 void CGOpenMPSIMDRuntime::emitTargetCall(
13082     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13083     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13084     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13085     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13086                                      const OMPLoopDirective &D)>
13087         SizeEmitter) {
13088   llvm_unreachable("Not supported in SIMD-only mode");
13089 }
13090 
13091 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13092   llvm_unreachable("Not supported in SIMD-only mode");
13093 }
13094 
13095 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13096   llvm_unreachable("Not supported in SIMD-only mode");
13097 }
13098 
13099 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13100   return false;
13101 }
13102 
13103 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13104                                         const OMPExecutableDirective &D,
13105                                         SourceLocation Loc,
13106                                         llvm::Function *OutlinedFn,
13107                                         ArrayRef<llvm::Value *> CapturedVars) {
13108   llvm_unreachable("Not supported in SIMD-only mode");
13109 }
13110 
13111 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13112                                              const Expr *NumTeams,
13113                                              const Expr *ThreadLimit,
13114                                              SourceLocation Loc) {
13115   llvm_unreachable("Not supported in SIMD-only mode");
13116 }
13117 
13118 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13119     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13120     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13121   llvm_unreachable("Not supported in SIMD-only mode");
13122 }
13123 
13124 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13125     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13126     const Expr *Device) {
13127   llvm_unreachable("Not supported in SIMD-only mode");
13128 }
13129 
13130 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13131                                            const OMPLoopDirective &D,
13132                                            ArrayRef<Expr *> NumIterations) {
13133   llvm_unreachable("Not supported in SIMD-only mode");
13134 }
13135 
13136 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13137                                               const OMPDependClause *C) {
13138   llvm_unreachable("Not supported in SIMD-only mode");
13139 }
13140 
13141 const VarDecl *
13142 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13143                                         const VarDecl *NativeParam) const {
13144   llvm_unreachable("Not supported in SIMD-only mode");
13145 }
13146 
13147 Address
13148 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13149                                          const VarDecl *NativeParam,
13150                                          const VarDecl *TargetParam) const {
13151   llvm_unreachable("Not supported in SIMD-only mode");
13152 }
13153