1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412 
413 public:
414   /// Constructs region for combined constructs.
415   /// \param CodeGen Code generation sequence for combined directives. Includes
416   /// a list of functions used for code generation of implicitly inlined
417   /// regions.
418   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419                           OpenMPDirectiveKind Kind, bool HasCancel)
420       : CGF(CGF) {
421     // Start emission for the construct.
422     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426     CGF.LambdaThisCaptureField = nullptr;
427     BlockInfo = CGF.BlockInfo;
428     CGF.BlockInfo = nullptr;
429   }
430 
431   ~InlinedOpenMPRegionRAII() {
432     // Restore original CapturedStmtInfo only if we're done with code emission.
433     auto *OldCSI =
434         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435     delete CGF.CapturedStmtInfo;
436     CGF.CapturedStmtInfo = OldCSI;
437     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439     CGF.BlockInfo = BlockInfo;
440   }
441 };
442 
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447   /// Use trampoline for internal microtask.
448   OMP_IDENT_IMD = 0x01,
449   /// Use c-style ident structure.
450   OMP_IDENT_KMPC = 0x02,
451   /// Atomic reduction option for kmpc_reduce.
452   OMP_ATOMIC_REDUCE = 0x10,
453   /// Explicit 'barrier' directive.
454   OMP_IDENT_BARRIER_EXPL = 0x20,
455   /// Implicit barrier in code.
456   OMP_IDENT_BARRIER_IMPL = 0x40,
457   /// Implicit barrier in 'for' directive.
458   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459   /// Implicit barrier in 'sections' directive.
460   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461   /// Implicit barrier in 'single' directive.
462   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463   /// Call of __kmp_for_static_init for static loop.
464   OMP_IDENT_WORK_LOOP = 0x200,
465   /// Call of __kmp_for_static_init for sections.
466   OMP_IDENT_WORK_SECTIONS = 0x400,
467   /// Call of __kmp_for_static_init for distribute.
468   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471 
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476   /// flag undefined.
477   OMP_REQ_UNDEFINED               = 0x000,
478   /// no requires clause present.
479   OMP_REQ_NONE                    = 0x001,
480   /// reverse_offload clause.
481   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482   /// unified_address clause.
483   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484   /// unified_shared_memory clause.
485   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486   /// dynamic_allocators clause.
487   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490 
491 enum OpenMPOffloadingReservedDeviceIDs {
492   /// Device ID if the device was not defined, runtime should get it
493   /// from environment variables in the spec.
494   OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497 
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504 ///                                  see above  */
505 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506 ///                                  KMP_IDENT_KMPC identifies this union
507 ///                                  member  */
508 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509 ///                                  see above */
510 ///#if USE_ITT_BUILD
511 ///                            /*  but currently used for storing
512 ///                                region-specific ITT */
513 ///                            /*  contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516 ///                                 C++  */
517 ///    char const *psource;    /**< String describing the source location.
518 ///                            The string is composed of semi-colon separated
519 //                             fields which describe the source file,
520 ///                            the function and a pair of line numbers that
521 ///                            delimit the construct.
522 ///                             */
523 /// } ident_t;
524 enum IdentFieldIndex {
525   /// might be used in Fortran
526   IdentField_Reserved_1,
527   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528   IdentField_Flags,
529   /// Not really used in Fortran any more
530   IdentField_Reserved_2,
531   /// Source[4] in Fortran, do not use for C++
532   IdentField_Reserved_3,
533   /// String describing the source location. The string is composed of
534   /// semi-colon separated fields which describe the source file, the function
535   /// and a pair of line numbers that delimit the construct.
536   IdentField_PSource
537 };
538 
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542   /// Lower bound for default (unordered) versions.
543   OMP_sch_lower = 32,
544   OMP_sch_static_chunked = 33,
545   OMP_sch_static = 34,
546   OMP_sch_dynamic_chunked = 35,
547   OMP_sch_guided_chunked = 36,
548   OMP_sch_runtime = 37,
549   OMP_sch_auto = 38,
550   /// static with chunk adjustment (e.g., simd)
551   OMP_sch_static_balanced_chunked = 45,
552   /// Lower bound for 'ordered' versions.
553   OMP_ord_lower = 64,
554   OMP_ord_static_chunked = 65,
555   OMP_ord_static = 66,
556   OMP_ord_dynamic_chunked = 67,
557   OMP_ord_guided_chunked = 68,
558   OMP_ord_runtime = 69,
559   OMP_ord_auto = 70,
560   OMP_sch_default = OMP_sch_static,
561   /// dist_schedule types
562   OMP_dist_sch_static_chunked = 91,
563   OMP_dist_sch_static = 92,
564   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565   /// Set if the monotonic schedule modifier was present.
566   OMP_sch_modifier_monotonic = (1 << 29),
567   /// Set if the nonmonotonic schedule modifier was present.
568   OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570 
571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572 /// region.
573 class CleanupTy final : public EHScopeStack::Cleanup {
574   PrePostActionTy *Action;
575 
576 public:
577   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
578   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579     if (!CGF.HaveInsertPoint())
580       return;
581     Action->Exit(CGF);
582   }
583 };
584 
585 } // anonymous namespace
586 
587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588   CodeGenFunction::RunCleanupsScope Scope(CGF);
589   if (PrePostAction) {
590     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591     Callback(CodeGen, CGF, *PrePostAction);
592   } else {
593     PrePostActionTy Action;
594     Callback(CodeGen, CGF, Action);
595   }
596 }
597 
598 /// Check if the combiner is a call to UDR combiner and if it is so return the
599 /// UDR decl used for reduction.
600 static const OMPDeclareReductionDecl *
601 getReductionInit(const Expr *ReductionOp) {
602   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604       if (const auto *DRE =
605               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607           return DRD;
608   return nullptr;
609 }
610 
611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612                                              const OMPDeclareReductionDecl *DRD,
613                                              const Expr *InitOp,
614                                              Address Private, Address Original,
615                                              QualType Ty) {
616   if (DRD->getInitializer()) {
617     std::pair<llvm::Function *, llvm::Function *> Reduction =
618         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619     const auto *CE = cast<CallExpr>(InitOp);
620     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623     const auto *LHSDRE =
624         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625     const auto *RHSDRE =
626         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629                             [=]() { return Private; });
630     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631                             [=]() { return Original; });
632     (void)PrivateScope.Privatize();
633     RValue Func = RValue::get(Reduction.second);
634     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635     CGF.EmitIgnoredExpr(InitOp);
636   } else {
637     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639     auto *GV = new llvm::GlobalVariable(
640         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641         llvm::GlobalValue::PrivateLinkage, Init, Name);
642     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643     RValue InitRVal;
644     switch (CGF.getEvaluationKind(Ty)) {
645     case TEK_Scalar:
646       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647       break;
648     case TEK_Complex:
649       InitRVal =
650           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651       break;
652     case TEK_Aggregate:
653       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654       break;
655     }
656     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659                          /*IsInitializer=*/false);
660   }
661 }
662 
663 /// Emit initialization of arrays of complex types.
664 /// \param DestAddr Address of the array.
665 /// \param Type Type of array.
666 /// \param Init Initial expression of array.
667 /// \param SrcAddr Address of the original array.
668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669                                  QualType Type, bool EmitDeclareReductionInit,
670                                  const Expr *Init,
671                                  const OMPDeclareReductionDecl *DRD,
672                                  Address SrcAddr = Address::invalid()) {
673   // Perform element-by-element initialization.
674   QualType ElementTy;
675 
676   // Drill down to the base element type on both arrays.
677   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679   DestAddr =
680       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681   if (DRD)
682     SrcAddr =
683         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684 
685   llvm::Value *SrcBegin = nullptr;
686   if (DRD)
687     SrcBegin = SrcAddr.getPointer();
688   llvm::Value *DestBegin = DestAddr.getPointer();
689   // Cast from pointer to array type to pointer to single element.
690   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691   // The basic structure here is a while-do loop.
692   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694   llvm::Value *IsEmpty =
695       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697 
698   // Enter the loop body, making that address the current address.
699   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700   CGF.EmitBlock(BodyBB);
701 
702   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703 
704   llvm::PHINode *SrcElementPHI = nullptr;
705   Address SrcElementCurrent = Address::invalid();
706   if (DRD) {
707     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708                                           "omp.arraycpy.srcElementPast");
709     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710     SrcElementCurrent =
711         Address(SrcElementPHI,
712                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713   }
714   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716   DestElementPHI->addIncoming(DestBegin, EntryBB);
717   Address DestElementCurrent =
718       Address(DestElementPHI,
719               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720 
721   // Emit copy.
722   {
723     CodeGenFunction::RunCleanupsScope InitScope(CGF);
724     if (EmitDeclareReductionInit) {
725       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726                                        SrcElementCurrent, ElementTy);
727     } else
728       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729                            /*IsInitializer=*/false);
730   }
731 
732   if (DRD) {
733     // Shift the address forward by one element.
734     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737   }
738 
739   // Shift the address forward by one element.
740   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742   // Check whether we've reached the end.
743   llvm::Value *Done =
744       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747 
748   // Done.
749   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750 }
751 
752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753   return CGF.EmitOMPSharedLValue(E);
754 }
755 
756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757                                             const Expr *E) {
758   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760   return LValue();
761 }
762 
763 void ReductionCodeGen::emitAggregateInitialization(
764     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765     const OMPDeclareReductionDecl *DRD) {
766   // Emit VarDecl with copy init for arrays.
767   // Get the address of the original variable captured in current
768   // captured region.
769   const auto *PrivateVD =
770       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771   bool EmitDeclareReductionInit =
772       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
773   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774                        EmitDeclareReductionInit,
775                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776                                                 : PrivateVD->getInit(),
777                        DRD, SharedLVal.getAddress(CGF));
778 }
779 
780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781                                    ArrayRef<const Expr *> Origs,
782                                    ArrayRef<const Expr *> Privates,
783                                    ArrayRef<const Expr *> ReductionOps) {
784   ClausesData.reserve(Shareds.size());
785   SharedAddresses.reserve(Shareds.size());
786   Sizes.reserve(Shareds.size());
787   BaseDecls.reserve(Shareds.size());
788   const auto *IOrig = Origs.begin();
789   const auto *IPriv = Privates.begin();
790   const auto *IRed = ReductionOps.begin();
791   for (const Expr *Ref : Shareds) {
792     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793     std::advance(IOrig, 1);
794     std::advance(IPriv, 1);
795     std::advance(IRed, 1);
796   }
797 }
798 
799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801          "Number of generated lvalues must be exactly N.");
802   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804   SharedAddresses.emplace_back(First, Second);
805   if (ClausesData[N].Shared == ClausesData[N].Ref) {
806     OrigAddresses.emplace_back(First, Second);
807   } else {
808     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810     OrigAddresses.emplace_back(First, Second);
811   }
812 }
813 
814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815   const auto *PrivateVD =
816       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817   QualType PrivateType = PrivateVD->getType();
818   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819   if (!PrivateType->isVariablyModifiedType()) {
820     Sizes.emplace_back(
821         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822         nullptr);
823     return;
824   }
825   llvm::Value *Size;
826   llvm::Value *SizeInChars;
827   auto *ElemType =
828       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829           ->getElementType();
830   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831   if (AsArraySection) {
832     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833                                      OrigAddresses[N].first.getPointer(CGF));
834     Size = CGF.Builder.CreateNUWAdd(
835         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837   } else {
838     SizeInChars =
839         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841   }
842   Sizes.emplace_back(SizeInChars, Size);
843   CodeGenFunction::OpaqueValueMapping OpaqueMap(
844       CGF,
845       cast<OpaqueValueExpr>(
846           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847       RValue::get(Size));
848   CGF.EmitVariablyModifiedType(PrivateType);
849 }
850 
851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852                                          llvm::Value *Size) {
853   const auto *PrivateVD =
854       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855   QualType PrivateType = PrivateVD->getType();
856   if (!PrivateType->isVariablyModifiedType()) {
857     assert(!Size && !Sizes[N].second &&
858            "Size should be nullptr for non-variably modified reduction "
859            "items.");
860     return;
861   }
862   CodeGenFunction::OpaqueValueMapping OpaqueMap(
863       CGF,
864       cast<OpaqueValueExpr>(
865           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866       RValue::get(Size));
867   CGF.EmitVariablyModifiedType(PrivateType);
868 }
869 
870 void ReductionCodeGen::emitInitialization(
871     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873   assert(SharedAddresses.size() > N && "No variable was generated");
874   const auto *PrivateVD =
875       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876   const OMPDeclareReductionDecl *DRD =
877       getReductionInit(ClausesData[N].ReductionOp);
878   QualType PrivateType = PrivateVD->getType();
879   PrivateAddr = CGF.Builder.CreateElementBitCast(
880       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881   QualType SharedType = SharedAddresses[N].first.getType();
882   SharedLVal = CGF.MakeAddrLValue(
883       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884                                        CGF.ConvertTypeForMem(SharedType)),
885       SharedType, SharedAddresses[N].first.getBaseInfo(),
886       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888     if (DRD && DRD->getInitializer())
889       (void)DefaultInit(CGF);
890     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
892     (void)DefaultInit(CGF);
893     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894                                      PrivateAddr, SharedLVal.getAddress(CGF),
895                                      SharedLVal.getType());
896   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
897              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899                          PrivateVD->getType().getQualifiers(),
900                          /*IsInitializer=*/false);
901   }
902 }
903 
904 bool ReductionCodeGen::needCleanups(unsigned N) {
905   const auto *PrivateVD =
906       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907   QualType PrivateType = PrivateVD->getType();
908   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909   return DTorKind != QualType::DK_none;
910 }
911 
912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913                                     Address PrivateAddr) {
914   const auto *PrivateVD =
915       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916   QualType PrivateType = PrivateVD->getType();
917   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918   if (needCleanups(N)) {
919     PrivateAddr = CGF.Builder.CreateElementBitCast(
920         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922   }
923 }
924 
925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926                           LValue BaseLV) {
927   BaseTy = BaseTy.getNonReferenceType();
928   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
929          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932     } else {
933       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935     }
936     BaseTy = BaseTy->getPointeeType();
937   }
938   return CGF.MakeAddrLValue(
939       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940                                        CGF.ConvertTypeForMem(ElTy)),
941       BaseLV.getType(), BaseLV.getBaseInfo(),
942       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943 }
944 
945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947                           llvm::Value *Addr) {
948   Address Tmp = Address::invalid();
949   Address TopTmp = Address::invalid();
950   Address MostTopTmp = Address::invalid();
951   BaseTy = BaseTy.getNonReferenceType();
952   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
953          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954     Tmp = CGF.CreateMemTemp(BaseTy);
955     if (TopTmp.isValid())
956       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957     else
958       MostTopTmp = Tmp;
959     TopTmp = Tmp;
960     BaseTy = BaseTy->getPointeeType();
961   }
962   llvm::Type *Ty = BaseLVType;
963   if (Tmp.isValid())
964     Ty = Tmp.getElementType();
965   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966   if (Tmp.isValid()) {
967     CGF.Builder.CreateStore(Addr, Tmp);
968     return MostTopTmp;
969   }
970   return Address(Addr, BaseLVAlignment);
971 }
972 
973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974   const VarDecl *OrigVD = nullptr;
975   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978       Base = TempOASE->getBase()->IgnoreParenImpCasts();
979     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980       Base = TempASE->getBase()->IgnoreParenImpCasts();
981     DE = cast<DeclRefExpr>(Base);
982     OrigVD = cast<VarDecl>(DE->getDecl());
983   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   }
990   return OrigVD;
991 }
992 
993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994                                                Address PrivateAddr) {
995   const DeclRefExpr *DE;
996   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997     BaseDecls.emplace_back(OrigVD);
998     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999     LValue BaseLValue =
1000         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001                     OriginalBaseLValue);
1002     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004     llvm::Value *PrivatePointer =
1005         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006             PrivateAddr.getPointer(),
1007             SharedAddresses[N].first.getAddress(CGF).getType());
1008     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009     return castToBase(CGF, OrigVD->getType(),
1010                       SharedAddresses[N].first.getType(),
1011                       OriginalBaseLValue.getAddress(CGF).getType(),
1012                       OriginalBaseLValue.getAlignment(), Ptr);
1013   }
1014   BaseDecls.emplace_back(
1015       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016   return PrivateAddr;
1017 }
1018 
1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   return DRD && DRD->getInitializer();
1023 }
1024 
1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026   return CGF.EmitLoadOfPointerLValue(
1027       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028       getThreadIDVariable()->getType()->castAs<PointerType>());
1029 }
1030 
1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032   if (!CGF.HaveInsertPoint())
1033     return;
1034   // 1.2.2 OpenMP Language Terminology
1035   // Structured block - An executable statement with a single entry at the
1036   // top and a single exit at the bottom.
1037   // The point of exit cannot be a branch out of the structured block.
1038   // longjmp() and throw() must not violate the entry/exit criteria.
1039   CGF.EHStack.pushTerminate();
1040   CodeGen(CGF);
1041   CGF.EHStack.popTerminate();
1042 }
1043 
1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045     CodeGenFunction &CGF) {
1046   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047                             getThreadIDVariable()->getType(),
1048                             AlignmentSource::Decl);
1049 }
1050 
1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052                                        QualType FieldTy) {
1053   auto *Field = FieldDecl::Create(
1054       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057   Field->setAccess(AS_public);
1058   DC->addDecl(Field);
1059   return Field;
1060 }
1061 
1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063                                  StringRef Separator)
1064     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067 
1068   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069   OMPBuilder.initialize();
1070   loadOffloadInfoMetadata();
1071 }
1072 
1073 void CGOpenMPRuntime::clear() {
1074   InternalVars.clear();
1075   // Clean non-target variable declarations possibly used only in debug info.
1076   for (const auto &Data : EmittedNonTargetVariables) {
1077     if (!Data.getValue().pointsToAliveValue())
1078       continue;
1079     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080     if (!GV)
1081       continue;
1082     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083       continue;
1084     GV->eraseFromParent();
1085   }
1086 }
1087 
1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089   SmallString<128> Buffer;
1090   llvm::raw_svector_ostream OS(Buffer);
1091   StringRef Sep = FirstSeparator;
1092   for (StringRef Part : Parts) {
1093     OS << Sep << Part;
1094     Sep = Separator;
1095   }
1096   return std::string(OS.str());
1097 }
1098 
1099 static llvm::Function *
1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101                           const Expr *CombinerInitializer, const VarDecl *In,
1102                           const VarDecl *Out, bool IsCombiner) {
1103   // void .omp_combiner.(Ty *in, Ty *out);
1104   ASTContext &C = CGM.getContext();
1105   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106   FunctionArgList Args;
1107   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111   Args.push_back(&OmpOutParm);
1112   Args.push_back(&OmpInParm);
1113   const CGFunctionInfo &FnInfo =
1114       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116   std::string Name = CGM.getOpenMPRuntime().getName(
1117       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1118   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119                                     Name, &CGM.getModule());
1120   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121   if (CGM.getLangOpts().Optimize) {
1122     Fn->removeFnAttr(llvm::Attribute::NoInline);
1123     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125   }
1126   CodeGenFunction CGF(CGM);
1127   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130                     Out->getLocation());
1131   CodeGenFunction::OMPPrivateScope Scope(CGF);
1132   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135         .getAddress(CGF);
1136   });
1137   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140         .getAddress(CGF);
1141   });
1142   (void)Scope.Privatize();
1143   if (!IsCombiner && Out->hasInit() &&
1144       !CGF.isTrivialInitializer(Out->getInit())) {
1145     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146                          Out->getType().getQualifiers(),
1147                          /*IsInitializer=*/true);
1148   }
1149   if (CombinerInitializer)
1150     CGF.EmitIgnoredExpr(CombinerInitializer);
1151   Scope.ForceCleanup();
1152   CGF.FinishFunction();
1153   return Fn;
1154 }
1155 
1156 void CGOpenMPRuntime::emitUserDefinedReduction(
1157     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158   if (UDRMap.count(D) > 0)
1159     return;
1160   llvm::Function *Combiner = emitCombinerOrInitializer(
1161       CGM, D->getType(), D->getCombiner(),
1162       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164       /*IsCombiner=*/true);
1165   llvm::Function *Initializer = nullptr;
1166   if (const Expr *Init = D->getInitializer()) {
1167     Initializer = emitCombinerOrInitializer(
1168         CGM, D->getType(),
1169         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170                                                                      : nullptr,
1171         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173         /*IsCombiner=*/false);
1174   }
1175   UDRMap.try_emplace(D, Combiner, Initializer);
1176   if (CGF) {
1177     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178     Decls.second.push_back(D);
1179   }
1180 }
1181 
1182 std::pair<llvm::Function *, llvm::Function *>
1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184   auto I = UDRMap.find(D);
1185   if (I != UDRMap.end())
1186     return I->second;
1187   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188   return UDRMap.lookup(D);
1189 }
1190 
1191 namespace {
1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193 // Builder if one is present.
1194 struct PushAndPopStackRAII {
1195   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196                       bool HasCancel)
1197       : OMPBuilder(OMPBuilder) {
1198     if (!OMPBuilder)
1199       return;
1200 
1201     // The following callback is the crucial part of clangs cleanup process.
1202     //
1203     // NOTE:
1204     // Once the OpenMPIRBuilder is used to create parallel regions (and
1205     // similar), the cancellation destination (Dest below) is determined via
1206     // IP. That means if we have variables to finalize we split the block at IP,
1207     // use the new block (=BB) as destination to build a JumpDest (via
1208     // getJumpDestInCurrentScope(BB)) which then is fed to
1209     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210     // to push & pop an FinalizationInfo object.
1211     // The FiniCB will still be needed but at the point where the
1212     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214       assert(IP.getBlock()->end() == IP.getPoint() &&
1215              "Clang CG should cause non-terminated block!");
1216       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217       CGF.Builder.restoreIP(IP);
1218       CodeGenFunction::JumpDest Dest =
1219           CGF.getOMPCancelDestination(OMPD_parallel);
1220       CGF.EmitBranchThroughCleanup(Dest);
1221     };
1222 
1223     // TODO: Remove this once we emit parallel regions through the
1224     //       OpenMPIRBuilder as it can do this setup internally.
1225     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226         {FiniCB, OMPD_parallel, HasCancel});
1227     OMPBuilder->pushFinalizationCB(std::move(FI));
1228   }
1229   ~PushAndPopStackRAII() {
1230     if (OMPBuilder)
1231       OMPBuilder->popFinalizationCB();
1232   }
1233   llvm::OpenMPIRBuilder *OMPBuilder;
1234 };
1235 } // namespace
1236 
1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241   assert(ThreadIDVar->getType()->isPointerType() &&
1242          "thread id variable must be of type kmp_int32 *");
1243   CodeGenFunction CGF(CGM, true);
1244   bool HasCancel = false;
1245   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246     HasCancel = OPD->hasCancel();
1247   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248     HasCancel = OPD->hasCancel();
1249   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250     HasCancel = OPSD->hasCancel();
1251   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252     HasCancel = OPFD->hasCancel();
1253   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256     HasCancel = OPFD->hasCancel();
1257   else if (const auto *OPFD =
1258                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259     HasCancel = OPFD->hasCancel();
1260   else if (const auto *OPFD =
1261                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263 
1264   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265   //       parallel region to make cancellation barriers work properly.
1266   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269                                     HasCancel, OutlinedHelperName);
1270   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272 }
1273 
1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278   return emitParallelOrTeamsOutlinedFunction(
1279       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280 }
1281 
1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286   return emitParallelOrTeamsOutlinedFunction(
1287       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294     bool Tied, unsigned &NumberOfParts) {
1295   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296                                               PrePostActionTy &) {
1297     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299     llvm::Value *TaskArgs[] = {
1300         UpLoc, ThreadID,
1301         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302                                     TaskTVar->getType()->castAs<PointerType>())
1303             .getPointer(CGF)};
1304     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1306                         TaskArgs);
1307   };
1308   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309                                                             UntiedCodeGen);
1310   CodeGen.setAction(Action);
1311   assert(!ThreadIDVar->getType()->isPointerType() &&
1312          "thread id variable must be of type kmp_int32 for tasks");
1313   const OpenMPDirectiveKind Region =
1314       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315                                                       : OMPD_task;
1316   const CapturedStmt *CS = D.getCapturedStmt(Region);
1317   bool HasCancel = false;
1318   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319     HasCancel = TD->hasCancel();
1320   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321     HasCancel = TD->hasCancel();
1322   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323     HasCancel = TD->hasCancel();
1324   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326 
1327   CodeGenFunction CGF(CGM, true);
1328   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329                                         InnermostKind, HasCancel, Action);
1330   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332   if (!Tied)
1333     NumberOfParts = Action.getNumberOfParts();
1334   return Res;
1335 }
1336 
1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338                              const RecordDecl *RD, const CGRecordLayout &RL,
1339                              ArrayRef<llvm::Constant *> Data) {
1340   llvm::StructType *StructTy = RL.getLLVMType();
1341   unsigned PrevIdx = 0;
1342   ConstantInitBuilder CIBuilder(CGM);
1343   auto DI = Data.begin();
1344   for (const FieldDecl *FD : RD->fields()) {
1345     unsigned Idx = RL.getLLVMFieldNo(FD);
1346     // Fill the alignment.
1347     for (unsigned I = PrevIdx; I < Idx; ++I)
1348       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349     PrevIdx = Idx + 1;
1350     Fields.add(*DI);
1351     ++DI;
1352   }
1353 }
1354 
1355 template <class... As>
1356 static llvm::GlobalVariable *
1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359                    As &&... Args) {
1360   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362   ConstantInitBuilder CIBuilder(CGM);
1363   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364   buildStructValue(Fields, CGM, RD, RL, Data);
1365   return Fields.finishAndCreateGlobal(
1366       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367       std::forward<As>(Args)...);
1368 }
1369 
1370 template <typename T>
1371 static void
1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373                                          ArrayRef<llvm::Constant *> Data,
1374                                          T &Parent) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378   buildStructValue(Fields, CGM, RD, RL, Data);
1379   Fields.finishAndAddTo(Parent);
1380 }
1381 
1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383                                              bool AtCurrentPoint) {
1384   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386 
1387   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388   if (AtCurrentPoint) {
1389     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391   } else {
1392     Elem.second.ServiceInsertPt =
1393         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395   }
1396 }
1397 
1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400   if (Elem.second.ServiceInsertPt) {
1401     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402     Elem.second.ServiceInsertPt = nullptr;
1403     Ptr->eraseFromParent();
1404   }
1405 }
1406 
1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1408                                                   SourceLocation Loc,
1409                                                   SmallString<128> &Buffer) {
1410   llvm::raw_svector_ostream OS(Buffer);
1411   // Build debug location
1412   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1413   OS << ";" << PLoc.getFilename() << ";";
1414   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1415     OS << FD->getQualifiedNameAsString();
1416   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1417   return OS.str();
1418 }
1419 
1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1421                                                  SourceLocation Loc,
1422                                                  unsigned Flags) {
1423   llvm::Constant *SrcLocStr;
1424   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1425       Loc.isInvalid()) {
1426     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1427   } else {
1428     std::string FunctionName = "";
1429     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1430       FunctionName = FD->getQualifiedNameAsString();
1431     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1432     const char *FileName = PLoc.getFilename();
1433     unsigned Line = PLoc.getLine();
1434     unsigned Column = PLoc.getColumn();
1435     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1436                                                 Line, Column);
1437   }
1438   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1439   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1440                                      Reserved2Flags);
1441 }
1442 
1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1444                                           SourceLocation Loc) {
1445   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1446   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1447   // the clang invariants used below might be broken.
1448   if (CGM.getLangOpts().OpenMPIRBuilder) {
1449     SmallString<128> Buffer;
1450     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1451     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1452         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1453     return OMPBuilder.getOrCreateThreadID(
1454         OMPBuilder.getOrCreateIdent(SrcLocStr));
1455   }
1456 
1457   llvm::Value *ThreadID = nullptr;
1458   // Check whether we've already cached a load of the thread id in this
1459   // function.
1460   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1461   if (I != OpenMPLocThreadIDMap.end()) {
1462     ThreadID = I->second.ThreadID;
1463     if (ThreadID != nullptr)
1464       return ThreadID;
1465   }
1466   // If exceptions are enabled, do not use parameter to avoid possible crash.
1467   if (auto *OMPRegionInfo =
1468           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1469     if (OMPRegionInfo->getThreadIDVariable()) {
1470       // Check if this an outlined function with thread id passed as argument.
1471       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1472       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1473       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1474           !CGF.getLangOpts().CXXExceptions ||
1475           CGF.Builder.GetInsertBlock() == TopBlock ||
1476           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1477           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1478               TopBlock ||
1479           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1480               CGF.Builder.GetInsertBlock()) {
1481         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1482         // If value loaded in entry block, cache it and use it everywhere in
1483         // function.
1484         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1485           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1486           Elem.second.ThreadID = ThreadID;
1487         }
1488         return ThreadID;
1489       }
1490     }
1491   }
1492 
1493   // This is not an outlined function region - need to call __kmpc_int32
1494   // kmpc_global_thread_num(ident_t *loc).
1495   // Generate thread id value and cache this value for use across the
1496   // function.
1497   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498   if (!Elem.second.ServiceInsertPt)
1499     setLocThreadIdInsertPt(CGF);
1500   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1501   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1502   llvm::CallInst *Call = CGF.Builder.CreateCall(
1503       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1504                                             OMPRTL___kmpc_global_thread_num),
1505       emitUpdateLocation(CGF, Loc));
1506   Call->setCallingConv(CGF.getRuntimeCC());
1507   Elem.second.ThreadID = Call;
1508   return Call;
1509 }
1510 
1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1512   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1513   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1514     clearLocThreadIdInsertPt(CGF);
1515     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1516   }
1517   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1518     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1519       UDRMap.erase(D);
1520     FunctionUDRMap.erase(CGF.CurFn);
1521   }
1522   auto I = FunctionUDMMap.find(CGF.CurFn);
1523   if (I != FunctionUDMMap.end()) {
1524     for(const auto *D : I->second)
1525       UDMMap.erase(D);
1526     FunctionUDMMap.erase(I);
1527   }
1528   LastprivateConditionalToTypes.erase(CGF.CurFn);
1529 }
1530 
1531 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1532   return OMPBuilder.IdentPtr;
1533 }
1534 
1535 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1536   if (!Kmpc_MicroTy) {
1537     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1538     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1539                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1540     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1541   }
1542   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1543 }
1544 
1545 llvm::FunctionCallee
1546 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1547   assert((IVSize == 32 || IVSize == 64) &&
1548          "IV size is not compatible with the omp runtime");
1549   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1550                                             : "__kmpc_for_static_init_4u")
1551                                 : (IVSigned ? "__kmpc_for_static_init_8"
1552                                             : "__kmpc_for_static_init_8u");
1553   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1554   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1555   llvm::Type *TypeParams[] = {
1556     getIdentTyPointerTy(),                     // loc
1557     CGM.Int32Ty,                               // tid
1558     CGM.Int32Ty,                               // schedtype
1559     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1560     PtrTy,                                     // p_lower
1561     PtrTy,                                     // p_upper
1562     PtrTy,                                     // p_stride
1563     ITy,                                       // incr
1564     ITy                                        // chunk
1565   };
1566   auto *FnTy =
1567       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1568   return CGM.CreateRuntimeFunction(FnTy, Name);
1569 }
1570 
1571 llvm::FunctionCallee
1572 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1573   assert((IVSize == 32 || IVSize == 64) &&
1574          "IV size is not compatible with the omp runtime");
1575   StringRef Name =
1576       IVSize == 32
1577           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1578           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1579   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1580   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1581                                CGM.Int32Ty,           // tid
1582                                CGM.Int32Ty,           // schedtype
1583                                ITy,                   // lower
1584                                ITy,                   // upper
1585                                ITy,                   // stride
1586                                ITy                    // chunk
1587   };
1588   auto *FnTy =
1589       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1590   return CGM.CreateRuntimeFunction(FnTy, Name);
1591 }
1592 
1593 llvm::FunctionCallee
1594 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1595   assert((IVSize == 32 || IVSize == 64) &&
1596          "IV size is not compatible with the omp runtime");
1597   StringRef Name =
1598       IVSize == 32
1599           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1600           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1601   llvm::Type *TypeParams[] = {
1602       getIdentTyPointerTy(), // loc
1603       CGM.Int32Ty,           // tid
1604   };
1605   auto *FnTy =
1606       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1607   return CGM.CreateRuntimeFunction(FnTy, Name);
1608 }
1609 
1610 llvm::FunctionCallee
1611 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1612   assert((IVSize == 32 || IVSize == 64) &&
1613          "IV size is not compatible with the omp runtime");
1614   StringRef Name =
1615       IVSize == 32
1616           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1617           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1618   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1619   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1620   llvm::Type *TypeParams[] = {
1621     getIdentTyPointerTy(),                     // loc
1622     CGM.Int32Ty,                               // tid
1623     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1624     PtrTy,                                     // p_lower
1625     PtrTy,                                     // p_upper
1626     PtrTy                                      // p_stride
1627   };
1628   auto *FnTy =
1629       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1630   return CGM.CreateRuntimeFunction(FnTy, Name);
1631 }
1632 
1633 /// Obtain information that uniquely identifies a target entry. This
1634 /// consists of the file and device IDs as well as line number associated with
1635 /// the relevant entry source location.
1636 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1637                                      unsigned &DeviceID, unsigned &FileID,
1638                                      unsigned &LineNum) {
1639   SourceManager &SM = C.getSourceManager();
1640 
1641   // The loc should be always valid and have a file ID (the user cannot use
1642   // #pragma directives in macros)
1643 
1644   assert(Loc.isValid() && "Source location is expected to be always valid.");
1645 
1646   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1647   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1648 
1649   llvm::sys::fs::UniqueID ID;
1650   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1651     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1652         << PLoc.getFilename() << EC.message();
1653 
1654   DeviceID = ID.getDevice();
1655   FileID = ID.getFile();
1656   LineNum = PLoc.getLine();
1657 }
1658 
1659 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1660   if (CGM.getLangOpts().OpenMPSimd)
1661     return Address::invalid();
1662   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1663       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1664   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1665               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1666                HasRequiresUnifiedSharedMemory))) {
1667     SmallString<64> PtrName;
1668     {
1669       llvm::raw_svector_ostream OS(PtrName);
1670       OS << CGM.getMangledName(GlobalDecl(VD));
1671       if (!VD->isExternallyVisible()) {
1672         unsigned DeviceID, FileID, Line;
1673         getTargetEntryUniqueInfo(CGM.getContext(),
1674                                  VD->getCanonicalDecl()->getBeginLoc(),
1675                                  DeviceID, FileID, Line);
1676         OS << llvm::format("_%x", FileID);
1677       }
1678       OS << "_decl_tgt_ref_ptr";
1679     }
1680     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1681     if (!Ptr) {
1682       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1683       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1684                                         PtrName);
1685 
1686       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1687       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1688 
1689       if (!CGM.getLangOpts().OpenMPIsDevice)
1690         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1691       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1692     }
1693     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1694   }
1695   return Address::invalid();
1696 }
1697 
1698 llvm::Constant *
1699 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1700   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1701          !CGM.getContext().getTargetInfo().isTLSSupported());
1702   // Lookup the entry, lazily creating it if necessary.
1703   std::string Suffix = getName({"cache", ""});
1704   return getOrCreateInternalVariable(
1705       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1706 }
1707 
1708 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1709                                                 const VarDecl *VD,
1710                                                 Address VDAddr,
1711                                                 SourceLocation Loc) {
1712   if (CGM.getLangOpts().OpenMPUseTLS &&
1713       CGM.getContext().getTargetInfo().isTLSSupported())
1714     return VDAddr;
1715 
1716   llvm::Type *VarTy = VDAddr.getElementType();
1717   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1718                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1719                                                        CGM.Int8PtrTy),
1720                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1721                          getOrCreateThreadPrivateCache(VD)};
1722   return Address(CGF.EmitRuntimeCall(
1723                      OMPBuilder.getOrCreateRuntimeFunction(
1724                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1725                      Args),
1726                  VDAddr.getAlignment());
1727 }
1728 
1729 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1730     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1731     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1732   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1733   // library.
1734   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1735   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1736                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1737                       OMPLoc);
1738   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1739   // to register constructor/destructor for variable.
1740   llvm::Value *Args[] = {
1741       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1742       Ctor, CopyCtor, Dtor};
1743   CGF.EmitRuntimeCall(
1744       OMPBuilder.getOrCreateRuntimeFunction(
1745           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1746       Args);
1747 }
1748 
1749 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1750     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1751     bool PerformInit, CodeGenFunction *CGF) {
1752   if (CGM.getLangOpts().OpenMPUseTLS &&
1753       CGM.getContext().getTargetInfo().isTLSSupported())
1754     return nullptr;
1755 
1756   VD = VD->getDefinition(CGM.getContext());
1757   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1758     QualType ASTTy = VD->getType();
1759 
1760     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1761     const Expr *Init = VD->getAnyInitializer();
1762     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1763       // Generate function that re-emits the declaration's initializer into the
1764       // threadprivate copy of the variable VD
1765       CodeGenFunction CtorCGF(CGM);
1766       FunctionArgList Args;
1767       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1768                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1769                             ImplicitParamDecl::Other);
1770       Args.push_back(&Dst);
1771 
1772       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1773           CGM.getContext().VoidPtrTy, Args);
1774       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1775       std::string Name = getName({"__kmpc_global_ctor_", ""});
1776       llvm::Function *Fn =
1777           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1778       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1779                             Args, Loc, Loc);
1780       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1781           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1782           CGM.getContext().VoidPtrTy, Dst.getLocation());
1783       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1784       Arg = CtorCGF.Builder.CreateElementBitCast(
1785           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1786       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1787                                /*IsInitializer=*/true);
1788       ArgVal = CtorCGF.EmitLoadOfScalar(
1789           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1790           CGM.getContext().VoidPtrTy, Dst.getLocation());
1791       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1792       CtorCGF.FinishFunction();
1793       Ctor = Fn;
1794     }
1795     if (VD->getType().isDestructedType() != QualType::DK_none) {
1796       // Generate function that emits destructor call for the threadprivate copy
1797       // of the variable VD
1798       CodeGenFunction DtorCGF(CGM);
1799       FunctionArgList Args;
1800       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1801                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1802                             ImplicitParamDecl::Other);
1803       Args.push_back(&Dst);
1804 
1805       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1806           CGM.getContext().VoidTy, Args);
1807       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1808       std::string Name = getName({"__kmpc_global_dtor_", ""});
1809       llvm::Function *Fn =
1810           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1811       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1812       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1813                             Loc, Loc);
1814       // Create a scope with an artificial location for the body of this function.
1815       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1816       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1817           DtorCGF.GetAddrOfLocalVar(&Dst),
1818           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1819       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1820                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1821                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1822       DtorCGF.FinishFunction();
1823       Dtor = Fn;
1824     }
1825     // Do not emit init function if it is not required.
1826     if (!Ctor && !Dtor)
1827       return nullptr;
1828 
1829     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1830     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1831                                                /*isVarArg=*/false)
1832                            ->getPointerTo();
1833     // Copying constructor for the threadprivate variable.
1834     // Must be NULL - reserved by runtime, but currently it requires that this
1835     // parameter is always NULL. Otherwise it fires assertion.
1836     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1837     if (Ctor == nullptr) {
1838       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1839                                              /*isVarArg=*/false)
1840                          ->getPointerTo();
1841       Ctor = llvm::Constant::getNullValue(CtorTy);
1842     }
1843     if (Dtor == nullptr) {
1844       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1845                                              /*isVarArg=*/false)
1846                          ->getPointerTo();
1847       Dtor = llvm::Constant::getNullValue(DtorTy);
1848     }
1849     if (!CGF) {
1850       auto *InitFunctionTy =
1851           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1852       std::string Name = getName({"__omp_threadprivate_init_", ""});
1853       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1854           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1855       CodeGenFunction InitCGF(CGM);
1856       FunctionArgList ArgList;
1857       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1858                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1859                             Loc, Loc);
1860       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1861       InitCGF.FinishFunction();
1862       return InitFunction;
1863     }
1864     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1865   }
1866   return nullptr;
1867 }
1868 
1869 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1870                                                      llvm::GlobalVariable *Addr,
1871                                                      bool PerformInit) {
1872   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1873       !CGM.getLangOpts().OpenMPIsDevice)
1874     return false;
1875   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1876       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1877   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1878       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1879        HasRequiresUnifiedSharedMemory))
1880     return CGM.getLangOpts().OpenMPIsDevice;
1881   VD = VD->getDefinition(CGM.getContext());
1882   assert(VD && "Unknown VarDecl");
1883 
1884   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1885     return CGM.getLangOpts().OpenMPIsDevice;
1886 
1887   QualType ASTTy = VD->getType();
1888   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1889 
1890   // Produce the unique prefix to identify the new target regions. We use
1891   // the source location of the variable declaration which we know to not
1892   // conflict with any target region.
1893   unsigned DeviceID;
1894   unsigned FileID;
1895   unsigned Line;
1896   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1897   SmallString<128> Buffer, Out;
1898   {
1899     llvm::raw_svector_ostream OS(Buffer);
1900     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1901        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1902   }
1903 
1904   const Expr *Init = VD->getAnyInitializer();
1905   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1906     llvm::Constant *Ctor;
1907     llvm::Constant *ID;
1908     if (CGM.getLangOpts().OpenMPIsDevice) {
1909       // Generate function that re-emits the declaration's initializer into
1910       // the threadprivate copy of the variable VD
1911       CodeGenFunction CtorCGF(CGM);
1912 
1913       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1914       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1915       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1916           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1917       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1918       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1919                             FunctionArgList(), Loc, Loc);
1920       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1921       CtorCGF.EmitAnyExprToMem(Init,
1922                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1923                                Init->getType().getQualifiers(),
1924                                /*IsInitializer=*/true);
1925       CtorCGF.FinishFunction();
1926       Ctor = Fn;
1927       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1928       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1929     } else {
1930       Ctor = new llvm::GlobalVariable(
1931           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1932           llvm::GlobalValue::PrivateLinkage,
1933           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1934       ID = Ctor;
1935     }
1936 
1937     // Register the information for the entry associated with the constructor.
1938     Out.clear();
1939     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1940         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1941         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1942   }
1943   if (VD->getType().isDestructedType() != QualType::DK_none) {
1944     llvm::Constant *Dtor;
1945     llvm::Constant *ID;
1946     if (CGM.getLangOpts().OpenMPIsDevice) {
1947       // Generate function that emits destructor call for the threadprivate
1948       // copy of the variable VD
1949       CodeGenFunction DtorCGF(CGM);
1950 
1951       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1952       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1953       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1954           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1955       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1956       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1957                             FunctionArgList(), Loc, Loc);
1958       // Create a scope with an artificial location for the body of this
1959       // function.
1960       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1961       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1962                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1963                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1964       DtorCGF.FinishFunction();
1965       Dtor = Fn;
1966       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1967       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1968     } else {
1969       Dtor = new llvm::GlobalVariable(
1970           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1971           llvm::GlobalValue::PrivateLinkage,
1972           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1973       ID = Dtor;
1974     }
1975     // Register the information for the entry associated with the destructor.
1976     Out.clear();
1977     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1978         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1979         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1980   }
1981   return CGM.getLangOpts().OpenMPIsDevice;
1982 }
1983 
1984 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1985                                                           QualType VarType,
1986                                                           StringRef Name) {
1987   std::string Suffix = getName({"artificial", ""});
1988   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1989   llvm::Value *GAddr =
1990       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1991   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1992       CGM.getTarget().isTLSSupported()) {
1993     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1994     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
1995   }
1996   std::string CacheSuffix = getName({"cache", ""});
1997   llvm::Value *Args[] = {
1998       emitUpdateLocation(CGF, SourceLocation()),
1999       getThreadID(CGF, SourceLocation()),
2000       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2001       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2002                                 /*isSigned=*/false),
2003       getOrCreateInternalVariable(
2004           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2005   return Address(
2006       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2007           CGF.EmitRuntimeCall(
2008               OMPBuilder.getOrCreateRuntimeFunction(
2009                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2010               Args),
2011           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2012       CGM.getContext().getTypeAlignInChars(VarType));
2013 }
2014 
2015 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2016                                    const RegionCodeGenTy &ThenGen,
2017                                    const RegionCodeGenTy &ElseGen) {
2018   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2019 
2020   // If the condition constant folds and can be elided, try to avoid emitting
2021   // the condition and the dead arm of the if/else.
2022   bool CondConstant;
2023   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2024     if (CondConstant)
2025       ThenGen(CGF);
2026     else
2027       ElseGen(CGF);
2028     return;
2029   }
2030 
2031   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2032   // emit the conditional branch.
2033   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2034   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2035   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2036   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2037 
2038   // Emit the 'then' code.
2039   CGF.EmitBlock(ThenBlock);
2040   ThenGen(CGF);
2041   CGF.EmitBranch(ContBlock);
2042   // Emit the 'else' code if present.
2043   // There is no need to emit line number for unconditional branch.
2044   (void)ApplyDebugLocation::CreateEmpty(CGF);
2045   CGF.EmitBlock(ElseBlock);
2046   ElseGen(CGF);
2047   // There is no need to emit line number for unconditional branch.
2048   (void)ApplyDebugLocation::CreateEmpty(CGF);
2049   CGF.EmitBranch(ContBlock);
2050   // Emit the continuation block for code after the if.
2051   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2052 }
2053 
2054 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2055                                        llvm::Function *OutlinedFn,
2056                                        ArrayRef<llvm::Value *> CapturedVars,
2057                                        const Expr *IfCond) {
2058   if (!CGF.HaveInsertPoint())
2059     return;
2060   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2061   auto &M = CGM.getModule();
2062   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2063                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2064     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2065     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2066     llvm::Value *Args[] = {
2067         RTLoc,
2068         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2069         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2070     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2071     RealArgs.append(std::begin(Args), std::end(Args));
2072     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2073 
2074     llvm::FunctionCallee RTLFn =
2075         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2076     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2077   };
2078   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2079                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2080     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2081     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2082     // Build calls:
2083     // __kmpc_serialized_parallel(&Loc, GTid);
2084     llvm::Value *Args[] = {RTLoc, ThreadID};
2085     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2086                             M, OMPRTL___kmpc_serialized_parallel),
2087                         Args);
2088 
2089     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2090     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2091     Address ZeroAddrBound =
2092         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2093                                          /*Name=*/".bound.zero.addr");
2094     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2095     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2096     // ThreadId for serialized parallels is 0.
2097     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2098     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2099     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2100     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2101 
2102     // __kmpc_end_serialized_parallel(&Loc, GTid);
2103     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2104     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2105                             M, OMPRTL___kmpc_end_serialized_parallel),
2106                         EndArgs);
2107   };
2108   if (IfCond) {
2109     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2110   } else {
2111     RegionCodeGenTy ThenRCG(ThenGen);
2112     ThenRCG(CGF);
2113   }
2114 }
2115 
2116 // If we're inside an (outlined) parallel region, use the region info's
2117 // thread-ID variable (it is passed in a first argument of the outlined function
2118 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2119 // regular serial code region, get thread ID by calling kmp_int32
2120 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2121 // return the address of that temp.
2122 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2123                                              SourceLocation Loc) {
2124   if (auto *OMPRegionInfo =
2125           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2126     if (OMPRegionInfo->getThreadIDVariable())
2127       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2128 
2129   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2130   QualType Int32Ty =
2131       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2132   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2133   CGF.EmitStoreOfScalar(ThreadID,
2134                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2135 
2136   return ThreadIDTemp;
2137 }
2138 
2139 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2140     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2141   SmallString<256> Buffer;
2142   llvm::raw_svector_ostream Out(Buffer);
2143   Out << Name;
2144   StringRef RuntimeName = Out.str();
2145   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2146   if (Elem.second) {
2147     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2148            "OMP internal variable has different type than requested");
2149     return &*Elem.second;
2150   }
2151 
2152   return Elem.second = new llvm::GlobalVariable(
2153              CGM.getModule(), Ty, /*IsConstant*/ false,
2154              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2155              Elem.first(), /*InsertBefore=*/nullptr,
2156              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2157 }
2158 
2159 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2160   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2161   std::string Name = getName({Prefix, "var"});
2162   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2163 }
2164 
2165 namespace {
2166 /// Common pre(post)-action for different OpenMP constructs.
2167 class CommonActionTy final : public PrePostActionTy {
2168   llvm::FunctionCallee EnterCallee;
2169   ArrayRef<llvm::Value *> EnterArgs;
2170   llvm::FunctionCallee ExitCallee;
2171   ArrayRef<llvm::Value *> ExitArgs;
2172   bool Conditional;
2173   llvm::BasicBlock *ContBlock = nullptr;
2174 
2175 public:
2176   CommonActionTy(llvm::FunctionCallee EnterCallee,
2177                  ArrayRef<llvm::Value *> EnterArgs,
2178                  llvm::FunctionCallee ExitCallee,
2179                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2180       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2181         ExitArgs(ExitArgs), Conditional(Conditional) {}
2182   void Enter(CodeGenFunction &CGF) override {
2183     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2184     if (Conditional) {
2185       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2186       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2187       ContBlock = CGF.createBasicBlock("omp_if.end");
2188       // Generate the branch (If-stmt)
2189       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2190       CGF.EmitBlock(ThenBlock);
2191     }
2192   }
2193   void Done(CodeGenFunction &CGF) {
2194     // Emit the rest of blocks/branches
2195     CGF.EmitBranch(ContBlock);
2196     CGF.EmitBlock(ContBlock, true);
2197   }
2198   void Exit(CodeGenFunction &CGF) override {
2199     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2200   }
2201 };
2202 } // anonymous namespace
2203 
2204 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2205                                          StringRef CriticalName,
2206                                          const RegionCodeGenTy &CriticalOpGen,
2207                                          SourceLocation Loc, const Expr *Hint) {
2208   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2209   // CriticalOpGen();
2210   // __kmpc_end_critical(ident_t *, gtid, Lock);
2211   // Prepare arguments and build a call to __kmpc_critical
2212   if (!CGF.HaveInsertPoint())
2213     return;
2214   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2215                          getCriticalRegionLock(CriticalName)};
2216   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2217                                                 std::end(Args));
2218   if (Hint) {
2219     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2220         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2221   }
2222   CommonActionTy Action(
2223       OMPBuilder.getOrCreateRuntimeFunction(
2224           CGM.getModule(),
2225           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2226       EnterArgs,
2227       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2228                                             OMPRTL___kmpc_end_critical),
2229       Args);
2230   CriticalOpGen.setAction(Action);
2231   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2232 }
2233 
2234 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2235                                        const RegionCodeGenTy &MasterOpGen,
2236                                        SourceLocation Loc) {
2237   if (!CGF.HaveInsertPoint())
2238     return;
2239   // if(__kmpc_master(ident_t *, gtid)) {
2240   //   MasterOpGen();
2241   //   __kmpc_end_master(ident_t *, gtid);
2242   // }
2243   // Prepare arguments and build a call to __kmpc_master
2244   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2245   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2246                             CGM.getModule(), OMPRTL___kmpc_master),
2247                         Args,
2248                         OMPBuilder.getOrCreateRuntimeFunction(
2249                             CGM.getModule(), OMPRTL___kmpc_end_master),
2250                         Args,
2251                         /*Conditional=*/true);
2252   MasterOpGen.setAction(Action);
2253   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2254   Action.Done(CGF);
2255 }
2256 
2257 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2258                                         SourceLocation Loc) {
2259   if (!CGF.HaveInsertPoint())
2260     return;
2261   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2262     OMPBuilder.CreateTaskyield(CGF.Builder);
2263   } else {
2264     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2265     llvm::Value *Args[] = {
2266         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2267         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2268     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2269                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2270                         Args);
2271   }
2272 
2273   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2274     Region->emitUntiedSwitch(CGF);
2275 }
2276 
2277 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2278                                           const RegionCodeGenTy &TaskgroupOpGen,
2279                                           SourceLocation Loc) {
2280   if (!CGF.HaveInsertPoint())
2281     return;
2282   // __kmpc_taskgroup(ident_t *, gtid);
2283   // TaskgroupOpGen();
2284   // __kmpc_end_taskgroup(ident_t *, gtid);
2285   // Prepare arguments and build a call to __kmpc_taskgroup
2286   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2287   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2288                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2289                         Args,
2290                         OMPBuilder.getOrCreateRuntimeFunction(
2291                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2292                         Args);
2293   TaskgroupOpGen.setAction(Action);
2294   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2295 }
2296 
2297 /// Given an array of pointers to variables, project the address of a
2298 /// given variable.
2299 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2300                                       unsigned Index, const VarDecl *Var) {
2301   // Pull out the pointer to the variable.
2302   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2303   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2304 
2305   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2306   Addr = CGF.Builder.CreateElementBitCast(
2307       Addr, CGF.ConvertTypeForMem(Var->getType()));
2308   return Addr;
2309 }
2310 
2311 static llvm::Value *emitCopyprivateCopyFunction(
2312     CodeGenModule &CGM, llvm::Type *ArgsType,
2313     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2314     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2315     SourceLocation Loc) {
2316   ASTContext &C = CGM.getContext();
2317   // void copy_func(void *LHSArg, void *RHSArg);
2318   FunctionArgList Args;
2319   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2320                            ImplicitParamDecl::Other);
2321   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2322                            ImplicitParamDecl::Other);
2323   Args.push_back(&LHSArg);
2324   Args.push_back(&RHSArg);
2325   const auto &CGFI =
2326       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2327   std::string Name =
2328       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2329   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2330                                     llvm::GlobalValue::InternalLinkage, Name,
2331                                     &CGM.getModule());
2332   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2333   Fn->setDoesNotRecurse();
2334   CodeGenFunction CGF(CGM);
2335   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2336   // Dest = (void*[n])(LHSArg);
2337   // Src = (void*[n])(RHSArg);
2338   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2339       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2340       ArgsType), CGF.getPointerAlign());
2341   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2342       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2343       ArgsType), CGF.getPointerAlign());
2344   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2345   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2346   // ...
2347   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2348   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2349     const auto *DestVar =
2350         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2351     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2352 
2353     const auto *SrcVar =
2354         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2355     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2356 
2357     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2358     QualType Type = VD->getType();
2359     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2360   }
2361   CGF.FinishFunction();
2362   return Fn;
2363 }
2364 
2365 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2366                                        const RegionCodeGenTy &SingleOpGen,
2367                                        SourceLocation Loc,
2368                                        ArrayRef<const Expr *> CopyprivateVars,
2369                                        ArrayRef<const Expr *> SrcExprs,
2370                                        ArrayRef<const Expr *> DstExprs,
2371                                        ArrayRef<const Expr *> AssignmentOps) {
2372   if (!CGF.HaveInsertPoint())
2373     return;
2374   assert(CopyprivateVars.size() == SrcExprs.size() &&
2375          CopyprivateVars.size() == DstExprs.size() &&
2376          CopyprivateVars.size() == AssignmentOps.size());
2377   ASTContext &C = CGM.getContext();
2378   // int32 did_it = 0;
2379   // if(__kmpc_single(ident_t *, gtid)) {
2380   //   SingleOpGen();
2381   //   __kmpc_end_single(ident_t *, gtid);
2382   //   did_it = 1;
2383   // }
2384   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2385   // <copy_func>, did_it);
2386 
2387   Address DidIt = Address::invalid();
2388   if (!CopyprivateVars.empty()) {
2389     // int32 did_it = 0;
2390     QualType KmpInt32Ty =
2391         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2392     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2393     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2394   }
2395   // Prepare arguments and build a call to __kmpc_single
2396   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2397   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2398                             CGM.getModule(), OMPRTL___kmpc_single),
2399                         Args,
2400                         OMPBuilder.getOrCreateRuntimeFunction(
2401                             CGM.getModule(), OMPRTL___kmpc_end_single),
2402                         Args,
2403                         /*Conditional=*/true);
2404   SingleOpGen.setAction(Action);
2405   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2406   if (DidIt.isValid()) {
2407     // did_it = 1;
2408     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2409   }
2410   Action.Done(CGF);
2411   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2412   // <copy_func>, did_it);
2413   if (DidIt.isValid()) {
2414     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2415     QualType CopyprivateArrayTy = C.getConstantArrayType(
2416         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2417         /*IndexTypeQuals=*/0);
2418     // Create a list of all private variables for copyprivate.
2419     Address CopyprivateList =
2420         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2421     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2422       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2423       CGF.Builder.CreateStore(
2424           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2425               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2426               CGF.VoidPtrTy),
2427           Elem);
2428     }
2429     // Build function that copies private values from single region to all other
2430     // threads in the corresponding parallel region.
2431     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2432         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2433         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2434     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2435     Address CL =
2436       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2437                                                       CGF.VoidPtrTy);
2438     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2439     llvm::Value *Args[] = {
2440         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2441         getThreadID(CGF, Loc),        // i32 <gtid>
2442         BufSize,                      // size_t <buf_size>
2443         CL.getPointer(),              // void *<copyprivate list>
2444         CpyFn,                        // void (*) (void *, void *) <copy_func>
2445         DidItVal                      // i32 did_it
2446     };
2447     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2448                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2449                         Args);
2450   }
2451 }
2452 
2453 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2454                                         const RegionCodeGenTy &OrderedOpGen,
2455                                         SourceLocation Loc, bool IsThreads) {
2456   if (!CGF.HaveInsertPoint())
2457     return;
2458   // __kmpc_ordered(ident_t *, gtid);
2459   // OrderedOpGen();
2460   // __kmpc_end_ordered(ident_t *, gtid);
2461   // Prepare arguments and build a call to __kmpc_ordered
2462   if (IsThreads) {
2463     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2464     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2465                               CGM.getModule(), OMPRTL___kmpc_ordered),
2466                           Args,
2467                           OMPBuilder.getOrCreateRuntimeFunction(
2468                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2469                           Args);
2470     OrderedOpGen.setAction(Action);
2471     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2472     return;
2473   }
2474   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2475 }
2476 
2477 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2478   unsigned Flags;
2479   if (Kind == OMPD_for)
2480     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2481   else if (Kind == OMPD_sections)
2482     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2483   else if (Kind == OMPD_single)
2484     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2485   else if (Kind == OMPD_barrier)
2486     Flags = OMP_IDENT_BARRIER_EXPL;
2487   else
2488     Flags = OMP_IDENT_BARRIER_IMPL;
2489   return Flags;
2490 }
2491 
2492 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2493     CodeGenFunction &CGF, const OMPLoopDirective &S,
2494     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2495   // Check if the loop directive is actually a doacross loop directive. In this
2496   // case choose static, 1 schedule.
2497   if (llvm::any_of(
2498           S.getClausesOfKind<OMPOrderedClause>(),
2499           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2500     ScheduleKind = OMPC_SCHEDULE_static;
2501     // Chunk size is 1 in this case.
2502     llvm::APInt ChunkSize(32, 1);
2503     ChunkExpr = IntegerLiteral::Create(
2504         CGF.getContext(), ChunkSize,
2505         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2506         SourceLocation());
2507   }
2508 }
2509 
2510 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2511                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2512                                       bool ForceSimpleCall) {
2513   // Check if we should use the OMPBuilder
2514   auto *OMPRegionInfo =
2515       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2516   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2517     CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
2518         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2519     return;
2520   }
2521 
2522   if (!CGF.HaveInsertPoint())
2523     return;
2524   // Build call __kmpc_cancel_barrier(loc, thread_id);
2525   // Build call __kmpc_barrier(loc, thread_id);
2526   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2527   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2528   // thread_id);
2529   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2530                          getThreadID(CGF, Loc)};
2531   if (OMPRegionInfo) {
2532     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2533       llvm::Value *Result = CGF.EmitRuntimeCall(
2534           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2535                                                 OMPRTL___kmpc_cancel_barrier),
2536           Args);
2537       if (EmitChecks) {
2538         // if (__kmpc_cancel_barrier()) {
2539         //   exit from construct;
2540         // }
2541         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2542         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2543         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2544         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2545         CGF.EmitBlock(ExitBB);
2546         //   exit from construct;
2547         CodeGenFunction::JumpDest CancelDestination =
2548             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2549         CGF.EmitBranchThroughCleanup(CancelDestination);
2550         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2551       }
2552       return;
2553     }
2554   }
2555   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2556                           CGM.getModule(), OMPRTL___kmpc_barrier),
2557                       Args);
2558 }
2559 
2560 /// Map the OpenMP loop schedule to the runtime enumeration.
2561 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2562                                           bool Chunked, bool Ordered) {
2563   switch (ScheduleKind) {
2564   case OMPC_SCHEDULE_static:
2565     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2566                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2567   case OMPC_SCHEDULE_dynamic:
2568     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2569   case OMPC_SCHEDULE_guided:
2570     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2571   case OMPC_SCHEDULE_runtime:
2572     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2573   case OMPC_SCHEDULE_auto:
2574     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2575   case OMPC_SCHEDULE_unknown:
2576     assert(!Chunked && "chunk was specified but schedule kind not known");
2577     return Ordered ? OMP_ord_static : OMP_sch_static;
2578   }
2579   llvm_unreachable("Unexpected runtime schedule");
2580 }
2581 
2582 /// Map the OpenMP distribute schedule to the runtime enumeration.
2583 static OpenMPSchedType
2584 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2585   // only static is allowed for dist_schedule
2586   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2587 }
2588 
2589 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2590                                          bool Chunked) const {
2591   OpenMPSchedType Schedule =
2592       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2593   return Schedule == OMP_sch_static;
2594 }
2595 
2596 bool CGOpenMPRuntime::isStaticNonchunked(
2597     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2598   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2599   return Schedule == OMP_dist_sch_static;
2600 }
2601 
2602 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2603                                       bool Chunked) const {
2604   OpenMPSchedType Schedule =
2605       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2606   return Schedule == OMP_sch_static_chunked;
2607 }
2608 
2609 bool CGOpenMPRuntime::isStaticChunked(
2610     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2611   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2612   return Schedule == OMP_dist_sch_static_chunked;
2613 }
2614 
2615 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2616   OpenMPSchedType Schedule =
2617       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2618   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2619   return Schedule != OMP_sch_static;
2620 }
2621 
2622 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2623                                   OpenMPScheduleClauseModifier M1,
2624                                   OpenMPScheduleClauseModifier M2) {
2625   int Modifier = 0;
2626   switch (M1) {
2627   case OMPC_SCHEDULE_MODIFIER_monotonic:
2628     Modifier = OMP_sch_modifier_monotonic;
2629     break;
2630   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2631     Modifier = OMP_sch_modifier_nonmonotonic;
2632     break;
2633   case OMPC_SCHEDULE_MODIFIER_simd:
2634     if (Schedule == OMP_sch_static_chunked)
2635       Schedule = OMP_sch_static_balanced_chunked;
2636     break;
2637   case OMPC_SCHEDULE_MODIFIER_last:
2638   case OMPC_SCHEDULE_MODIFIER_unknown:
2639     break;
2640   }
2641   switch (M2) {
2642   case OMPC_SCHEDULE_MODIFIER_monotonic:
2643     Modifier = OMP_sch_modifier_monotonic;
2644     break;
2645   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2646     Modifier = OMP_sch_modifier_nonmonotonic;
2647     break;
2648   case OMPC_SCHEDULE_MODIFIER_simd:
2649     if (Schedule == OMP_sch_static_chunked)
2650       Schedule = OMP_sch_static_balanced_chunked;
2651     break;
2652   case OMPC_SCHEDULE_MODIFIER_last:
2653   case OMPC_SCHEDULE_MODIFIER_unknown:
2654     break;
2655   }
2656   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2657   // If the static schedule kind is specified or if the ordered clause is
2658   // specified, and if the nonmonotonic modifier is not specified, the effect is
2659   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2660   // modifier is specified, the effect is as if the nonmonotonic modifier is
2661   // specified.
2662   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2663     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2664           Schedule == OMP_sch_static_balanced_chunked ||
2665           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2666           Schedule == OMP_dist_sch_static_chunked ||
2667           Schedule == OMP_dist_sch_static))
2668       Modifier = OMP_sch_modifier_nonmonotonic;
2669   }
2670   return Schedule | Modifier;
2671 }
2672 
2673 void CGOpenMPRuntime::emitForDispatchInit(
2674     CodeGenFunction &CGF, SourceLocation Loc,
2675     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2676     bool Ordered, const DispatchRTInput &DispatchValues) {
2677   if (!CGF.HaveInsertPoint())
2678     return;
2679   OpenMPSchedType Schedule = getRuntimeSchedule(
2680       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2681   assert(Ordered ||
2682          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2683           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2684           Schedule != OMP_sch_static_balanced_chunked));
2685   // Call __kmpc_dispatch_init(
2686   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2687   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2688   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2689 
2690   // If the Chunk was not specified in the clause - use default value 1.
2691   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2692                                             : CGF.Builder.getIntN(IVSize, 1);
2693   llvm::Value *Args[] = {
2694       emitUpdateLocation(CGF, Loc),
2695       getThreadID(CGF, Loc),
2696       CGF.Builder.getInt32(addMonoNonMonoModifier(
2697           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2698       DispatchValues.LB,                                     // Lower
2699       DispatchValues.UB,                                     // Upper
2700       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2701       Chunk                                                  // Chunk
2702   };
2703   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2704 }
2705 
2706 static void emitForStaticInitCall(
2707     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2708     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2709     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2710     const CGOpenMPRuntime::StaticRTInput &Values) {
2711   if (!CGF.HaveInsertPoint())
2712     return;
2713 
2714   assert(!Values.Ordered);
2715   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2716          Schedule == OMP_sch_static_balanced_chunked ||
2717          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2718          Schedule == OMP_dist_sch_static ||
2719          Schedule == OMP_dist_sch_static_chunked);
2720 
2721   // Call __kmpc_for_static_init(
2722   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2723   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2724   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2725   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2726   llvm::Value *Chunk = Values.Chunk;
2727   if (Chunk == nullptr) {
2728     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2729             Schedule == OMP_dist_sch_static) &&
2730            "expected static non-chunked schedule");
2731     // If the Chunk was not specified in the clause - use default value 1.
2732     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2733   } else {
2734     assert((Schedule == OMP_sch_static_chunked ||
2735             Schedule == OMP_sch_static_balanced_chunked ||
2736             Schedule == OMP_ord_static_chunked ||
2737             Schedule == OMP_dist_sch_static_chunked) &&
2738            "expected static chunked schedule");
2739   }
2740   llvm::Value *Args[] = {
2741       UpdateLocation,
2742       ThreadId,
2743       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2744                                                   M2)), // Schedule type
2745       Values.IL.getPointer(),                           // &isLastIter
2746       Values.LB.getPointer(),                           // &LB
2747       Values.UB.getPointer(),                           // &UB
2748       Values.ST.getPointer(),                           // &Stride
2749       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2750       Chunk                                             // Chunk
2751   };
2752   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2753 }
2754 
2755 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2756                                         SourceLocation Loc,
2757                                         OpenMPDirectiveKind DKind,
2758                                         const OpenMPScheduleTy &ScheduleKind,
2759                                         const StaticRTInput &Values) {
2760   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2761       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2762   assert(isOpenMPWorksharingDirective(DKind) &&
2763          "Expected loop-based or sections-based directive.");
2764   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2765                                              isOpenMPLoopDirective(DKind)
2766                                                  ? OMP_IDENT_WORK_LOOP
2767                                                  : OMP_IDENT_WORK_SECTIONS);
2768   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2769   llvm::FunctionCallee StaticInitFunction =
2770       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2771   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2772   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2773                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2774 }
2775 
2776 void CGOpenMPRuntime::emitDistributeStaticInit(
2777     CodeGenFunction &CGF, SourceLocation Loc,
2778     OpenMPDistScheduleClauseKind SchedKind,
2779     const CGOpenMPRuntime::StaticRTInput &Values) {
2780   OpenMPSchedType ScheduleNum =
2781       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2782   llvm::Value *UpdatedLocation =
2783       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2784   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2785   llvm::FunctionCallee StaticInitFunction =
2786       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2787   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2788                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2789                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2790 }
2791 
2792 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2793                                           SourceLocation Loc,
2794                                           OpenMPDirectiveKind DKind) {
2795   if (!CGF.HaveInsertPoint())
2796     return;
2797   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2798   llvm::Value *Args[] = {
2799       emitUpdateLocation(CGF, Loc,
2800                          isOpenMPDistributeDirective(DKind)
2801                              ? OMP_IDENT_WORK_DISTRIBUTE
2802                              : isOpenMPLoopDirective(DKind)
2803                                    ? OMP_IDENT_WORK_LOOP
2804                                    : OMP_IDENT_WORK_SECTIONS),
2805       getThreadID(CGF, Loc)};
2806   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2807   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2808                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2809                       Args);
2810 }
2811 
2812 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2813                                                  SourceLocation Loc,
2814                                                  unsigned IVSize,
2815                                                  bool IVSigned) {
2816   if (!CGF.HaveInsertPoint())
2817     return;
2818   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2819   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2820   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2821 }
2822 
2823 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2824                                           SourceLocation Loc, unsigned IVSize,
2825                                           bool IVSigned, Address IL,
2826                                           Address LB, Address UB,
2827                                           Address ST) {
2828   // Call __kmpc_dispatch_next(
2829   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2830   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2831   //          kmp_int[32|64] *p_stride);
2832   llvm::Value *Args[] = {
2833       emitUpdateLocation(CGF, Loc),
2834       getThreadID(CGF, Loc),
2835       IL.getPointer(), // &isLastIter
2836       LB.getPointer(), // &Lower
2837       UB.getPointer(), // &Upper
2838       ST.getPointer()  // &Stride
2839   };
2840   llvm::Value *Call =
2841       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2842   return CGF.EmitScalarConversion(
2843       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2844       CGF.getContext().BoolTy, Loc);
2845 }
2846 
2847 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2848                                            llvm::Value *NumThreads,
2849                                            SourceLocation Loc) {
2850   if (!CGF.HaveInsertPoint())
2851     return;
2852   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2853   llvm::Value *Args[] = {
2854       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2855       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2856   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2857                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2858                       Args);
2859 }
2860 
2861 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2862                                          ProcBindKind ProcBind,
2863                                          SourceLocation Loc) {
2864   if (!CGF.HaveInsertPoint())
2865     return;
2866   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2867   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2868   llvm::Value *Args[] = {
2869       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2870       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2871   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2872                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2873                       Args);
2874 }
2875 
2876 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2877                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2878   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2879     OMPBuilder.CreateFlush(CGF.Builder);
2880   } else {
2881     if (!CGF.HaveInsertPoint())
2882       return;
2883     // Build call void __kmpc_flush(ident_t *loc)
2884     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2885                             CGM.getModule(), OMPRTL___kmpc_flush),
2886                         emitUpdateLocation(CGF, Loc));
2887   }
2888 }
2889 
2890 namespace {
2891 /// Indexes of fields for type kmp_task_t.
2892 enum KmpTaskTFields {
2893   /// List of shared variables.
2894   KmpTaskTShareds,
2895   /// Task routine.
2896   KmpTaskTRoutine,
2897   /// Partition id for the untied tasks.
2898   KmpTaskTPartId,
2899   /// Function with call of destructors for private variables.
2900   Data1,
2901   /// Task priority.
2902   Data2,
2903   /// (Taskloops only) Lower bound.
2904   KmpTaskTLowerBound,
2905   /// (Taskloops only) Upper bound.
2906   KmpTaskTUpperBound,
2907   /// (Taskloops only) Stride.
2908   KmpTaskTStride,
2909   /// (Taskloops only) Is last iteration flag.
2910   KmpTaskTLastIter,
2911   /// (Taskloops only) Reduction data.
2912   KmpTaskTReductions,
2913 };
2914 } // anonymous namespace
2915 
2916 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2917   return OffloadEntriesTargetRegion.empty() &&
2918          OffloadEntriesDeviceGlobalVar.empty();
2919 }
2920 
2921 /// Initialize target region entry.
2922 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2923     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2924                                     StringRef ParentName, unsigned LineNum,
2925                                     unsigned Order) {
2926   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2927                                              "only required for the device "
2928                                              "code generation.");
2929   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2930       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2931                                    OMPTargetRegionEntryTargetRegion);
2932   ++OffloadingEntriesNum;
2933 }
2934 
2935 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2936     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2937                                   StringRef ParentName, unsigned LineNum,
2938                                   llvm::Constant *Addr, llvm::Constant *ID,
2939                                   OMPTargetRegionEntryKind Flags) {
2940   // If we are emitting code for a target, the entry is already initialized,
2941   // only has to be registered.
2942   if (CGM.getLangOpts().OpenMPIsDevice) {
2943     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
2944       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2945           DiagnosticsEngine::Error,
2946           "Unable to find target region on line '%0' in the device code.");
2947       CGM.getDiags().Report(DiagID) << LineNum;
2948       return;
2949     }
2950     auto &Entry =
2951         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2952     assert(Entry.isValid() && "Entry not initialized!");
2953     Entry.setAddress(Addr);
2954     Entry.setID(ID);
2955     Entry.setFlags(Flags);
2956   } else {
2957     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2958     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2959     ++OffloadingEntriesNum;
2960   }
2961 }
2962 
2963 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2964     unsigned DeviceID, unsigned FileID, StringRef ParentName,
2965     unsigned LineNum) const {
2966   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2967   if (PerDevice == OffloadEntriesTargetRegion.end())
2968     return false;
2969   auto PerFile = PerDevice->second.find(FileID);
2970   if (PerFile == PerDevice->second.end())
2971     return false;
2972   auto PerParentName = PerFile->second.find(ParentName);
2973   if (PerParentName == PerFile->second.end())
2974     return false;
2975   auto PerLine = PerParentName->second.find(LineNum);
2976   if (PerLine == PerParentName->second.end())
2977     return false;
2978   // Fail if this entry is already registered.
2979   if (PerLine->second.getAddress() || PerLine->second.getID())
2980     return false;
2981   return true;
2982 }
2983 
2984 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2985     const OffloadTargetRegionEntryInfoActTy &Action) {
2986   // Scan all target region entries and perform the provided action.
2987   for (const auto &D : OffloadEntriesTargetRegion)
2988     for (const auto &F : D.second)
2989       for (const auto &P : F.second)
2990         for (const auto &L : P.second)
2991           Action(D.first, F.first, P.first(), L.first, L.second);
2992 }
2993 
2994 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2995     initializeDeviceGlobalVarEntryInfo(StringRef Name,
2996                                        OMPTargetGlobalVarEntryKind Flags,
2997                                        unsigned Order) {
2998   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2999                                              "only required for the device "
3000                                              "code generation.");
3001   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3002   ++OffloadingEntriesNum;
3003 }
3004 
3005 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3006     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3007                                      CharUnits VarSize,
3008                                      OMPTargetGlobalVarEntryKind Flags,
3009                                      llvm::GlobalValue::LinkageTypes Linkage) {
3010   if (CGM.getLangOpts().OpenMPIsDevice) {
3011     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3012     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3013            "Entry not initialized!");
3014     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3015            "Resetting with the new address.");
3016     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3017       if (Entry.getVarSize().isZero()) {
3018         Entry.setVarSize(VarSize);
3019         Entry.setLinkage(Linkage);
3020       }
3021       return;
3022     }
3023     Entry.setVarSize(VarSize);
3024     Entry.setLinkage(Linkage);
3025     Entry.setAddress(Addr);
3026   } else {
3027     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3028       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3029       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3030              "Entry not initialized!");
3031       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3032              "Resetting with the new address.");
3033       if (Entry.getVarSize().isZero()) {
3034         Entry.setVarSize(VarSize);
3035         Entry.setLinkage(Linkage);
3036       }
3037       return;
3038     }
3039     OffloadEntriesDeviceGlobalVar.try_emplace(
3040         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3041     ++OffloadingEntriesNum;
3042   }
3043 }
3044 
3045 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3046     actOnDeviceGlobalVarEntriesInfo(
3047         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3048   // Scan all target region entries and perform the provided action.
3049   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3050     Action(E.getKey(), E.getValue());
3051 }
3052 
3053 void CGOpenMPRuntime::createOffloadEntry(
3054     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3055     llvm::GlobalValue::LinkageTypes Linkage) {
3056   StringRef Name = Addr->getName();
3057   llvm::Module &M = CGM.getModule();
3058   llvm::LLVMContext &C = M.getContext();
3059 
3060   // Create constant string with the name.
3061   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3062 
3063   std::string StringName = getName({"omp_offloading", "entry_name"});
3064   auto *Str = new llvm::GlobalVariable(
3065       M, StrPtrInit->getType(), /*isConstant=*/true,
3066       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3067   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3068 
3069   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3070                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3071                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3072                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3073                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3074   std::string EntryName = getName({"omp_offloading", "entry", ""});
3075   llvm::GlobalVariable *Entry = createGlobalStruct(
3076       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3077       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3078 
3079   // The entry has to be created in the section the linker expects it to be.
3080   Entry->setSection("omp_offloading_entries");
3081 }
3082 
3083 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3084   // Emit the offloading entries and metadata so that the device codegen side
3085   // can easily figure out what to emit. The produced metadata looks like
3086   // this:
3087   //
3088   // !omp_offload.info = !{!1, ...}
3089   //
3090   // Right now we only generate metadata for function that contain target
3091   // regions.
3092 
3093   // If we are in simd mode or there are no entries, we don't need to do
3094   // anything.
3095   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3096     return;
3097 
3098   llvm::Module &M = CGM.getModule();
3099   llvm::LLVMContext &C = M.getContext();
3100   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3101                          SourceLocation, StringRef>,
3102               16>
3103       OrderedEntries(OffloadEntriesInfoManager.size());
3104   llvm::SmallVector<StringRef, 16> ParentFunctions(
3105       OffloadEntriesInfoManager.size());
3106 
3107   // Auxiliary methods to create metadata values and strings.
3108   auto &&GetMDInt = [this](unsigned V) {
3109     return llvm::ConstantAsMetadata::get(
3110         llvm::ConstantInt::get(CGM.Int32Ty, V));
3111   };
3112 
3113   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3114 
3115   // Create the offloading info metadata node.
3116   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3117 
3118   // Create function that emits metadata for each target region entry;
3119   auto &&TargetRegionMetadataEmitter =
3120       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3121        &GetMDString](
3122           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3123           unsigned Line,
3124           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3125         // Generate metadata for target regions. Each entry of this metadata
3126         // contains:
3127         // - Entry 0 -> Kind of this type of metadata (0).
3128         // - Entry 1 -> Device ID of the file where the entry was identified.
3129         // - Entry 2 -> File ID of the file where the entry was identified.
3130         // - Entry 3 -> Mangled name of the function where the entry was
3131         // identified.
3132         // - Entry 4 -> Line in the file where the entry was identified.
3133         // - Entry 5 -> Order the entry was created.
3134         // The first element of the metadata node is the kind.
3135         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3136                                  GetMDInt(FileID),      GetMDString(ParentName),
3137                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3138 
3139         SourceLocation Loc;
3140         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3141                   E = CGM.getContext().getSourceManager().fileinfo_end();
3142              I != E; ++I) {
3143           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3144               I->getFirst()->getUniqueID().getFile() == FileID) {
3145             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3146                 I->getFirst(), Line, 1);
3147             break;
3148           }
3149         }
3150         // Save this entry in the right position of the ordered entries array.
3151         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3152         ParentFunctions[E.getOrder()] = ParentName;
3153 
3154         // Add metadata to the named metadata node.
3155         MD->addOperand(llvm::MDNode::get(C, Ops));
3156       };
3157 
3158   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3159       TargetRegionMetadataEmitter);
3160 
3161   // Create function that emits metadata for each device global variable entry;
3162   auto &&DeviceGlobalVarMetadataEmitter =
3163       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3164        MD](StringRef MangledName,
3165            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3166                &E) {
3167         // Generate metadata for global variables. Each entry of this metadata
3168         // contains:
3169         // - Entry 0 -> Kind of this type of metadata (1).
3170         // - Entry 1 -> Mangled name of the variable.
3171         // - Entry 2 -> Declare target kind.
3172         // - Entry 3 -> Order the entry was created.
3173         // The first element of the metadata node is the kind.
3174         llvm::Metadata *Ops[] = {
3175             GetMDInt(E.getKind()), GetMDString(MangledName),
3176             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3177 
3178         // Save this entry in the right position of the ordered entries array.
3179         OrderedEntries[E.getOrder()] =
3180             std::make_tuple(&E, SourceLocation(), MangledName);
3181 
3182         // Add metadata to the named metadata node.
3183         MD->addOperand(llvm::MDNode::get(C, Ops));
3184       };
3185 
3186   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3187       DeviceGlobalVarMetadataEmitter);
3188 
3189   for (const auto &E : OrderedEntries) {
3190     assert(std::get<0>(E) && "All ordered entries must exist!");
3191     if (const auto *CE =
3192             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3193                 std::get<0>(E))) {
3194       if (!CE->getID() || !CE->getAddress()) {
3195         // Do not blame the entry if the parent funtion is not emitted.
3196         StringRef FnName = ParentFunctions[CE->getOrder()];
3197         if (!CGM.GetGlobalValue(FnName))
3198           continue;
3199         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3200             DiagnosticsEngine::Error,
3201             "Offloading entry for target region in %0 is incorrect: either the "
3202             "address or the ID is invalid.");
3203         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3204         continue;
3205       }
3206       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3207                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3208     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3209                                              OffloadEntryInfoDeviceGlobalVar>(
3210                    std::get<0>(E))) {
3211       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3212           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3213               CE->getFlags());
3214       switch (Flags) {
3215       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3216         if (CGM.getLangOpts().OpenMPIsDevice &&
3217             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3218           continue;
3219         if (!CE->getAddress()) {
3220           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3221               DiagnosticsEngine::Error, "Offloading entry for declare target "
3222                                         "variable %0 is incorrect: the "
3223                                         "address is invalid.");
3224           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3225           continue;
3226         }
3227         // The vaiable has no definition - no need to add the entry.
3228         if (CE->getVarSize().isZero())
3229           continue;
3230         break;
3231       }
3232       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3233         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3234                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3235                "Declaret target link address is set.");
3236         if (CGM.getLangOpts().OpenMPIsDevice)
3237           continue;
3238         if (!CE->getAddress()) {
3239           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3240               DiagnosticsEngine::Error,
3241               "Offloading entry for declare target variable is incorrect: the "
3242               "address is invalid.");
3243           CGM.getDiags().Report(DiagID);
3244           continue;
3245         }
3246         break;
3247       }
3248       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3249                          CE->getVarSize().getQuantity(), Flags,
3250                          CE->getLinkage());
3251     } else {
3252       llvm_unreachable("Unsupported entry kind.");
3253     }
3254   }
3255 }
3256 
3257 /// Loads all the offload entries information from the host IR
3258 /// metadata.
3259 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3260   // If we are in target mode, load the metadata from the host IR. This code has
3261   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3262 
3263   if (!CGM.getLangOpts().OpenMPIsDevice)
3264     return;
3265 
3266   if (CGM.getLangOpts().OMPHostIRFile.empty())
3267     return;
3268 
3269   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3270   if (auto EC = Buf.getError()) {
3271     CGM.getDiags().Report(diag::err_cannot_open_file)
3272         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3273     return;
3274   }
3275 
3276   llvm::LLVMContext C;
3277   auto ME = expectedToErrorOrAndEmitErrors(
3278       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3279 
3280   if (auto EC = ME.getError()) {
3281     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3282         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3283     CGM.getDiags().Report(DiagID)
3284         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3285     return;
3286   }
3287 
3288   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3289   if (!MD)
3290     return;
3291 
3292   for (llvm::MDNode *MN : MD->operands()) {
3293     auto &&GetMDInt = [MN](unsigned Idx) {
3294       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3295       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3296     };
3297 
3298     auto &&GetMDString = [MN](unsigned Idx) {
3299       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3300       return V->getString();
3301     };
3302 
3303     switch (GetMDInt(0)) {
3304     default:
3305       llvm_unreachable("Unexpected metadata!");
3306       break;
3307     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3308         OffloadingEntryInfoTargetRegion:
3309       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3310           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3311           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3312           /*Order=*/GetMDInt(5));
3313       break;
3314     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3315         OffloadingEntryInfoDeviceGlobalVar:
3316       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3317           /*MangledName=*/GetMDString(1),
3318           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3319               /*Flags=*/GetMDInt(2)),
3320           /*Order=*/GetMDInt(3));
3321       break;
3322     }
3323   }
3324 }
3325 
3326 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3327   if (!KmpRoutineEntryPtrTy) {
3328     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3329     ASTContext &C = CGM.getContext();
3330     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3331     FunctionProtoType::ExtProtoInfo EPI;
3332     KmpRoutineEntryPtrQTy = C.getPointerType(
3333         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3334     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3335   }
3336 }
3337 
3338 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3339   // Make sure the type of the entry is already created. This is the type we
3340   // have to create:
3341   // struct __tgt_offload_entry{
3342   //   void      *addr;       // Pointer to the offload entry info.
3343   //                          // (function or global)
3344   //   char      *name;       // Name of the function or global.
3345   //   size_t     size;       // Size of the entry info (0 if it a function).
3346   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3347   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3348   // };
3349   if (TgtOffloadEntryQTy.isNull()) {
3350     ASTContext &C = CGM.getContext();
3351     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3352     RD->startDefinition();
3353     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3354     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3355     addFieldToRecordDecl(C, RD, C.getSizeType());
3356     addFieldToRecordDecl(
3357         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3358     addFieldToRecordDecl(
3359         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3360     RD->completeDefinition();
3361     RD->addAttr(PackedAttr::CreateImplicit(C));
3362     TgtOffloadEntryQTy = C.getRecordType(RD);
3363   }
3364   return TgtOffloadEntryQTy;
3365 }
3366 
3367 namespace {
3368 struct PrivateHelpersTy {
3369   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3370                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3371       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3372         PrivateElemInit(PrivateElemInit) {}
3373   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3374   const Expr *OriginalRef = nullptr;
3375   const VarDecl *Original = nullptr;
3376   const VarDecl *PrivateCopy = nullptr;
3377   const VarDecl *PrivateElemInit = nullptr;
3378   bool isLocalPrivate() const {
3379     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3380   }
3381 };
3382 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3383 } // anonymous namespace
3384 
3385 static RecordDecl *
3386 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3387   if (!Privates.empty()) {
3388     ASTContext &C = CGM.getContext();
3389     // Build struct .kmp_privates_t. {
3390     //         /*  private vars  */
3391     //       };
3392     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3393     RD->startDefinition();
3394     for (const auto &Pair : Privates) {
3395       const VarDecl *VD = Pair.second.Original;
3396       QualType Type = VD->getType().getNonReferenceType();
3397       // If the private variable is a local variable with lvalue ref type,
3398       // allocate the pointer instead of the pointee type.
3399       if (Pair.second.isLocalPrivate() &&
3400           VD->getType()->isLValueReferenceType())
3401         Type = C.getPointerType(Type);
3402       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3403       if (VD->hasAttrs()) {
3404         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3405              E(VD->getAttrs().end());
3406              I != E; ++I)
3407           FD->addAttr(*I);
3408       }
3409     }
3410     RD->completeDefinition();
3411     return RD;
3412   }
3413   return nullptr;
3414 }
3415 
3416 static RecordDecl *
3417 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3418                          QualType KmpInt32Ty,
3419                          QualType KmpRoutineEntryPointerQTy) {
3420   ASTContext &C = CGM.getContext();
3421   // Build struct kmp_task_t {
3422   //         void *              shareds;
3423   //         kmp_routine_entry_t routine;
3424   //         kmp_int32           part_id;
3425   //         kmp_cmplrdata_t data1;
3426   //         kmp_cmplrdata_t data2;
3427   // For taskloops additional fields:
3428   //         kmp_uint64          lb;
3429   //         kmp_uint64          ub;
3430   //         kmp_int64           st;
3431   //         kmp_int32           liter;
3432   //         void *              reductions;
3433   //       };
3434   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3435   UD->startDefinition();
3436   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3437   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3438   UD->completeDefinition();
3439   QualType KmpCmplrdataTy = C.getRecordType(UD);
3440   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3441   RD->startDefinition();
3442   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3443   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3444   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3445   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3446   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3447   if (isOpenMPTaskLoopDirective(Kind)) {
3448     QualType KmpUInt64Ty =
3449         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3450     QualType KmpInt64Ty =
3451         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3452     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3453     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3454     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3455     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3456     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3457   }
3458   RD->completeDefinition();
3459   return RD;
3460 }
3461 
3462 static RecordDecl *
3463 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3464                                      ArrayRef<PrivateDataTy> Privates) {
3465   ASTContext &C = CGM.getContext();
3466   // Build struct kmp_task_t_with_privates {
3467   //         kmp_task_t task_data;
3468   //         .kmp_privates_t. privates;
3469   //       };
3470   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3471   RD->startDefinition();
3472   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3473   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3474     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3475   RD->completeDefinition();
3476   return RD;
3477 }
3478 
3479 /// Emit a proxy function which accepts kmp_task_t as the second
3480 /// argument.
3481 /// \code
3482 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3483 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3484 ///   For taskloops:
3485 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3486 ///   tt->reductions, tt->shareds);
3487 ///   return 0;
3488 /// }
3489 /// \endcode
3490 static llvm::Function *
3491 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3492                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3493                       QualType KmpTaskTWithPrivatesPtrQTy,
3494                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3495                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3496                       llvm::Value *TaskPrivatesMap) {
3497   ASTContext &C = CGM.getContext();
3498   FunctionArgList Args;
3499   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3500                             ImplicitParamDecl::Other);
3501   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3502                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3503                                 ImplicitParamDecl::Other);
3504   Args.push_back(&GtidArg);
3505   Args.push_back(&TaskTypeArg);
3506   const auto &TaskEntryFnInfo =
3507       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3508   llvm::FunctionType *TaskEntryTy =
3509       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3510   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3511   auto *TaskEntry = llvm::Function::Create(
3512       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3513   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3514   TaskEntry->setDoesNotRecurse();
3515   CodeGenFunction CGF(CGM);
3516   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3517                     Loc, Loc);
3518 
3519   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3520   // tt,
3521   // For taskloops:
3522   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3523   // tt->task_data.shareds);
3524   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3525       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3526   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3527       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3528       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3529   const auto *KmpTaskTWithPrivatesQTyRD =
3530       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3531   LValue Base =
3532       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3533   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3534   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3535   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3536   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3537 
3538   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3539   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3540   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3541       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3542       CGF.ConvertTypeForMem(SharedsPtrTy));
3543 
3544   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3545   llvm::Value *PrivatesParam;
3546   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3547     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3548     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3549         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3550   } else {
3551     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3552   }
3553 
3554   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3555                                TaskPrivatesMap,
3556                                CGF.Builder
3557                                    .CreatePointerBitCastOrAddrSpaceCast(
3558                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3559                                    .getPointer()};
3560   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3561                                           std::end(CommonArgs));
3562   if (isOpenMPTaskLoopDirective(Kind)) {
3563     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3564     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3565     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3566     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3567     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3568     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3569     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3570     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3571     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3572     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3573     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3574     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3575     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3576     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3577     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3578     CallArgs.push_back(LBParam);
3579     CallArgs.push_back(UBParam);
3580     CallArgs.push_back(StParam);
3581     CallArgs.push_back(LIParam);
3582     CallArgs.push_back(RParam);
3583   }
3584   CallArgs.push_back(SharedsParam);
3585 
3586   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3587                                                   CallArgs);
3588   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3589                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3590   CGF.FinishFunction();
3591   return TaskEntry;
3592 }
3593 
3594 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3595                                             SourceLocation Loc,
3596                                             QualType KmpInt32Ty,
3597                                             QualType KmpTaskTWithPrivatesPtrQTy,
3598                                             QualType KmpTaskTWithPrivatesQTy) {
3599   ASTContext &C = CGM.getContext();
3600   FunctionArgList Args;
3601   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3602                             ImplicitParamDecl::Other);
3603   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3604                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3605                                 ImplicitParamDecl::Other);
3606   Args.push_back(&GtidArg);
3607   Args.push_back(&TaskTypeArg);
3608   const auto &DestructorFnInfo =
3609       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3610   llvm::FunctionType *DestructorFnTy =
3611       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3612   std::string Name =
3613       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3614   auto *DestructorFn =
3615       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3616                              Name, &CGM.getModule());
3617   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3618                                     DestructorFnInfo);
3619   DestructorFn->setDoesNotRecurse();
3620   CodeGenFunction CGF(CGM);
3621   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3622                     Args, Loc, Loc);
3623 
3624   LValue Base = CGF.EmitLoadOfPointerLValue(
3625       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3626       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3627   const auto *KmpTaskTWithPrivatesQTyRD =
3628       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3629   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3630   Base = CGF.EmitLValueForField(Base, *FI);
3631   for (const auto *Field :
3632        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3633     if (QualType::DestructionKind DtorKind =
3634             Field->getType().isDestructedType()) {
3635       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3636       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3637     }
3638   }
3639   CGF.FinishFunction();
3640   return DestructorFn;
3641 }
3642 
3643 /// Emit a privates mapping function for correct handling of private and
3644 /// firstprivate variables.
3645 /// \code
3646 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3647 /// **noalias priv1,...,  <tyn> **noalias privn) {
3648 ///   *priv1 = &.privates.priv1;
3649 ///   ...;
3650 ///   *privn = &.privates.privn;
3651 /// }
3652 /// \endcode
3653 static llvm::Value *
3654 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3655                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3656                                ArrayRef<PrivateDataTy> Privates) {
3657   ASTContext &C = CGM.getContext();
3658   FunctionArgList Args;
3659   ImplicitParamDecl TaskPrivatesArg(
3660       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3661       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3662       ImplicitParamDecl::Other);
3663   Args.push_back(&TaskPrivatesArg);
3664   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3665   unsigned Counter = 1;
3666   for (const Expr *E : Data.PrivateVars) {
3667     Args.push_back(ImplicitParamDecl::Create(
3668         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3669         C.getPointerType(C.getPointerType(E->getType()))
3670             .withConst()
3671             .withRestrict(),
3672         ImplicitParamDecl::Other));
3673     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3674     PrivateVarsPos[VD] = Counter;
3675     ++Counter;
3676   }
3677   for (const Expr *E : Data.FirstprivateVars) {
3678     Args.push_back(ImplicitParamDecl::Create(
3679         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3680         C.getPointerType(C.getPointerType(E->getType()))
3681             .withConst()
3682             .withRestrict(),
3683         ImplicitParamDecl::Other));
3684     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3685     PrivateVarsPos[VD] = Counter;
3686     ++Counter;
3687   }
3688   for (const Expr *E : Data.LastprivateVars) {
3689     Args.push_back(ImplicitParamDecl::Create(
3690         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3691         C.getPointerType(C.getPointerType(E->getType()))
3692             .withConst()
3693             .withRestrict(),
3694         ImplicitParamDecl::Other));
3695     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3696     PrivateVarsPos[VD] = Counter;
3697     ++Counter;
3698   }
3699   for (const VarDecl *VD : Data.PrivateLocals) {
3700     QualType Ty = VD->getType().getNonReferenceType();
3701     if (VD->getType()->isLValueReferenceType())
3702       Ty = C.getPointerType(Ty);
3703     Args.push_back(ImplicitParamDecl::Create(
3704         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3705         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3706         ImplicitParamDecl::Other));
3707     PrivateVarsPos[VD] = Counter;
3708     ++Counter;
3709   }
3710   const auto &TaskPrivatesMapFnInfo =
3711       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3712   llvm::FunctionType *TaskPrivatesMapTy =
3713       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3714   std::string Name =
3715       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3716   auto *TaskPrivatesMap = llvm::Function::Create(
3717       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3718       &CGM.getModule());
3719   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3720                                     TaskPrivatesMapFnInfo);
3721   if (CGM.getLangOpts().Optimize) {
3722     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3723     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3724     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3725   }
3726   CodeGenFunction CGF(CGM);
3727   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3728                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3729 
3730   // *privi = &.privates.privi;
3731   LValue Base = CGF.EmitLoadOfPointerLValue(
3732       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3733       TaskPrivatesArg.getType()->castAs<PointerType>());
3734   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3735   Counter = 0;
3736   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3737     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3738     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3739     LValue RefLVal =
3740         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3741     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3742         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3743     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3744     ++Counter;
3745   }
3746   CGF.FinishFunction();
3747   return TaskPrivatesMap;
3748 }
3749 
3750 /// Emit initialization for private variables in task-based directives.
3751 static void emitPrivatesInit(CodeGenFunction &CGF,
3752                              const OMPExecutableDirective &D,
3753                              Address KmpTaskSharedsPtr, LValue TDBase,
3754                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3755                              QualType SharedsTy, QualType SharedsPtrTy,
3756                              const OMPTaskDataTy &Data,
3757                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3758   ASTContext &C = CGF.getContext();
3759   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3760   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3761   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3762                                  ? OMPD_taskloop
3763                                  : OMPD_task;
3764   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3765   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3766   LValue SrcBase;
3767   bool IsTargetTask =
3768       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3769       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3770   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
3771   // PointersArray and SizesArray. The original variables for these arrays are
3772   // not captured and we get their addresses explicitly.
3773   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3774       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3775     SrcBase = CGF.MakeAddrLValue(
3776         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3777             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3778         SharedsTy);
3779   }
3780   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3781   for (const PrivateDataTy &Pair : Privates) {
3782     // Do not initialize private locals.
3783     if (Pair.second.isLocalPrivate())
3784       continue;
3785     const VarDecl *VD = Pair.second.PrivateCopy;
3786     const Expr *Init = VD->getAnyInitializer();
3787     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3788                              !CGF.isTrivialInitializer(Init)))) {
3789       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3790       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3791         const VarDecl *OriginalVD = Pair.second.Original;
3792         // Check if the variable is the target-based BasePointersArray,
3793         // PointersArray or SizesArray.
3794         LValue SharedRefLValue;
3795         QualType Type = PrivateLValue.getType();
3796         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3797         if (IsTargetTask && !SharedField) {
3798           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3799                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3800                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3801                          ->getNumParams() == 0 &&
3802                  isa<TranslationUnitDecl>(
3803                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3804                          ->getDeclContext()) &&
3805                  "Expected artificial target data variable.");
3806           SharedRefLValue =
3807               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3808         } else if (ForDup) {
3809           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3810           SharedRefLValue = CGF.MakeAddrLValue(
3811               Address(SharedRefLValue.getPointer(CGF),
3812                       C.getDeclAlign(OriginalVD)),
3813               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3814               SharedRefLValue.getTBAAInfo());
3815         } else if (CGF.LambdaCaptureFields.count(
3816                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3817                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3818           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3819         } else {
3820           // Processing for implicitly captured variables.
3821           InlinedOpenMPRegionRAII Region(
3822               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3823               /*HasCancel=*/false);
3824           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3825         }
3826         if (Type->isArrayType()) {
3827           // Initialize firstprivate array.
3828           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3829             // Perform simple memcpy.
3830             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3831           } else {
3832             // Initialize firstprivate array using element-by-element
3833             // initialization.
3834             CGF.EmitOMPAggregateAssign(
3835                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3836                 Type,
3837                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3838                                                   Address SrcElement) {
3839                   // Clean up any temporaries needed by the initialization.
3840                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3841                   InitScope.addPrivate(
3842                       Elem, [SrcElement]() -> Address { return SrcElement; });
3843                   (void)InitScope.Privatize();
3844                   // Emit initialization for single element.
3845                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3846                       CGF, &CapturesInfo);
3847                   CGF.EmitAnyExprToMem(Init, DestElement,
3848                                        Init->getType().getQualifiers(),
3849                                        /*IsInitializer=*/false);
3850                 });
3851           }
3852         } else {
3853           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3854           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3855             return SharedRefLValue.getAddress(CGF);
3856           });
3857           (void)InitScope.Privatize();
3858           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3859           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3860                              /*capturedByInit=*/false);
3861         }
3862       } else {
3863         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3864       }
3865     }
3866     ++FI;
3867   }
3868 }
3869 
3870 /// Check if duplication function is required for taskloops.
3871 static bool checkInitIsRequired(CodeGenFunction &CGF,
3872                                 ArrayRef<PrivateDataTy> Privates) {
3873   bool InitRequired = false;
3874   for (const PrivateDataTy &Pair : Privates) {
3875     if (Pair.second.isLocalPrivate())
3876       continue;
3877     const VarDecl *VD = Pair.second.PrivateCopy;
3878     const Expr *Init = VD->getAnyInitializer();
3879     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3880                                     !CGF.isTrivialInitializer(Init));
3881     if (InitRequired)
3882       break;
3883   }
3884   return InitRequired;
3885 }
3886 
3887 
3888 /// Emit task_dup function (for initialization of
3889 /// private/firstprivate/lastprivate vars and last_iter flag)
3890 /// \code
3891 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3892 /// lastpriv) {
3893 /// // setup lastprivate flag
3894 ///    task_dst->last = lastpriv;
3895 /// // could be constructor calls here...
3896 /// }
3897 /// \endcode
3898 static llvm::Value *
3899 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3900                     const OMPExecutableDirective &D,
3901                     QualType KmpTaskTWithPrivatesPtrQTy,
3902                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3903                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3904                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3905                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3906   ASTContext &C = CGM.getContext();
3907   FunctionArgList Args;
3908   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3909                            KmpTaskTWithPrivatesPtrQTy,
3910                            ImplicitParamDecl::Other);
3911   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3912                            KmpTaskTWithPrivatesPtrQTy,
3913                            ImplicitParamDecl::Other);
3914   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3915                                 ImplicitParamDecl::Other);
3916   Args.push_back(&DstArg);
3917   Args.push_back(&SrcArg);
3918   Args.push_back(&LastprivArg);
3919   const auto &TaskDupFnInfo =
3920       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3921   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3922   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3923   auto *TaskDup = llvm::Function::Create(
3924       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3925   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3926   TaskDup->setDoesNotRecurse();
3927   CodeGenFunction CGF(CGM);
3928   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3929                     Loc);
3930 
3931   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3932       CGF.GetAddrOfLocalVar(&DstArg),
3933       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3934   // task_dst->liter = lastpriv;
3935   if (WithLastIter) {
3936     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3937     LValue Base = CGF.EmitLValueForField(
3938         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3939     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3940     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3941         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3942     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3943   }
3944 
3945   // Emit initial values for private copies (if any).
3946   assert(!Privates.empty());
3947   Address KmpTaskSharedsPtr = Address::invalid();
3948   if (!Data.FirstprivateVars.empty()) {
3949     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3950         CGF.GetAddrOfLocalVar(&SrcArg),
3951         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3952     LValue Base = CGF.EmitLValueForField(
3953         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3954     KmpTaskSharedsPtr = Address(
3955         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3956                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3957                                                   KmpTaskTShareds)),
3958                              Loc),
3959         CGM.getNaturalTypeAlignment(SharedsTy));
3960   }
3961   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3962                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3963   CGF.FinishFunction();
3964   return TaskDup;
3965 }
3966 
3967 /// Checks if destructor function is required to be generated.
3968 /// \return true if cleanups are required, false otherwise.
3969 static bool
3970 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3971                          ArrayRef<PrivateDataTy> Privates) {
3972   for (const PrivateDataTy &P : Privates) {
3973     if (P.second.isLocalPrivate())
3974       continue;
3975     QualType Ty = P.second.Original->getType().getNonReferenceType();
3976     if (Ty.isDestructedType())
3977       return true;
3978   }
3979   return false;
3980 }
3981 
3982 namespace {
3983 /// Loop generator for OpenMP iterator expression.
3984 class OMPIteratorGeneratorScope final
3985     : public CodeGenFunction::OMPPrivateScope {
3986   CodeGenFunction &CGF;
3987   const OMPIteratorExpr *E = nullptr;
3988   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3989   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3990   OMPIteratorGeneratorScope() = delete;
3991   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3992 
3993 public:
3994   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3995       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3996     if (!E)
3997       return;
3998     SmallVector<llvm::Value *, 4> Uppers;
3999     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4000       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4001       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4002       addPrivate(VD, [&CGF, VD]() {
4003         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4004       });
4005       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4006       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4007         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4008                                  "counter.addr");
4009       });
4010     }
4011     Privatize();
4012 
4013     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4014       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4015       LValue CLVal =
4016           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4017                              HelperData.CounterVD->getType());
4018       // Counter = 0;
4019       CGF.EmitStoreOfScalar(
4020           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4021           CLVal);
4022       CodeGenFunction::JumpDest &ContDest =
4023           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4024       CodeGenFunction::JumpDest &ExitDest =
4025           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4026       // N = <number-of_iterations>;
4027       llvm::Value *N = Uppers[I];
4028       // cont:
4029       // if (Counter < N) goto body; else goto exit;
4030       CGF.EmitBlock(ContDest.getBlock());
4031       auto *CVal =
4032           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4033       llvm::Value *Cmp =
4034           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4035               ? CGF.Builder.CreateICmpSLT(CVal, N)
4036               : CGF.Builder.CreateICmpULT(CVal, N);
4037       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4038       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4039       // body:
4040       CGF.EmitBlock(BodyBB);
4041       // Iteri = Begini + Counter * Stepi;
4042       CGF.EmitIgnoredExpr(HelperData.Update);
4043     }
4044   }
4045   ~OMPIteratorGeneratorScope() {
4046     if (!E)
4047       return;
4048     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4049       // Counter = Counter + 1;
4050       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4051       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4052       // goto cont;
4053       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4054       // exit:
4055       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4056     }
4057   }
4058 };
4059 } // namespace
4060 
4061 static std::pair<llvm::Value *, llvm::Value *>
4062 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4063   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4064   llvm::Value *Addr;
4065   if (OASE) {
4066     const Expr *Base = OASE->getBase();
4067     Addr = CGF.EmitScalarExpr(Base);
4068   } else {
4069     Addr = CGF.EmitLValue(E).getPointer(CGF);
4070   }
4071   llvm::Value *SizeVal;
4072   QualType Ty = E->getType();
4073   if (OASE) {
4074     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4075     for (const Expr *SE : OASE->getDimensions()) {
4076       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4077       Sz = CGF.EmitScalarConversion(
4078           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4079       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4080     }
4081   } else if (const auto *ASE =
4082                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4083     LValue UpAddrLVal =
4084         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4085     llvm::Value *UpAddr =
4086         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4087     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4088     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4089     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4090   } else {
4091     SizeVal = CGF.getTypeSize(Ty);
4092   }
4093   return std::make_pair(Addr, SizeVal);
4094 }
4095 
4096 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4097 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4098   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4099   if (KmpTaskAffinityInfoTy.isNull()) {
4100     RecordDecl *KmpAffinityInfoRD =
4101         C.buildImplicitRecord("kmp_task_affinity_info_t");
4102     KmpAffinityInfoRD->startDefinition();
4103     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4104     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4105     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4106     KmpAffinityInfoRD->completeDefinition();
4107     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4108   }
4109 }
4110 
4111 CGOpenMPRuntime::TaskResultTy
4112 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4113                               const OMPExecutableDirective &D,
4114                               llvm::Function *TaskFunction, QualType SharedsTy,
4115                               Address Shareds, const OMPTaskDataTy &Data) {
4116   ASTContext &C = CGM.getContext();
4117   llvm::SmallVector<PrivateDataTy, 4> Privates;
4118   // Aggregate privates and sort them by the alignment.
4119   const auto *I = Data.PrivateCopies.begin();
4120   for (const Expr *E : Data.PrivateVars) {
4121     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4122     Privates.emplace_back(
4123         C.getDeclAlign(VD),
4124         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4125                          /*PrivateElemInit=*/nullptr));
4126     ++I;
4127   }
4128   I = Data.FirstprivateCopies.begin();
4129   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4130   for (const Expr *E : Data.FirstprivateVars) {
4131     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4132     Privates.emplace_back(
4133         C.getDeclAlign(VD),
4134         PrivateHelpersTy(
4135             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4136             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4137     ++I;
4138     ++IElemInitRef;
4139   }
4140   I = Data.LastprivateCopies.begin();
4141   for (const Expr *E : Data.LastprivateVars) {
4142     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4143     Privates.emplace_back(
4144         C.getDeclAlign(VD),
4145         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4146                          /*PrivateElemInit=*/nullptr));
4147     ++I;
4148   }
4149   for (const VarDecl *VD : Data.PrivateLocals)
4150     Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4151   llvm::stable_sort(Privates,
4152                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4153                       return L.first > R.first;
4154                     });
4155   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4156   // Build type kmp_routine_entry_t (if not built yet).
4157   emitKmpRoutineEntryT(KmpInt32Ty);
4158   // Build type kmp_task_t (if not built yet).
4159   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4160     if (SavedKmpTaskloopTQTy.isNull()) {
4161       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4162           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4163     }
4164     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4165   } else {
4166     assert((D.getDirectiveKind() == OMPD_task ||
4167             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4168             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4169            "Expected taskloop, task or target directive");
4170     if (SavedKmpTaskTQTy.isNull()) {
4171       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4172           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4173     }
4174     KmpTaskTQTy = SavedKmpTaskTQTy;
4175   }
4176   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4177   // Build particular struct kmp_task_t for the given task.
4178   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4179       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4180   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4181   QualType KmpTaskTWithPrivatesPtrQTy =
4182       C.getPointerType(KmpTaskTWithPrivatesQTy);
4183   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4184   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4185       KmpTaskTWithPrivatesTy->getPointerTo();
4186   llvm::Value *KmpTaskTWithPrivatesTySize =
4187       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4188   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4189 
4190   // Emit initial values for private copies (if any).
4191   llvm::Value *TaskPrivatesMap = nullptr;
4192   llvm::Type *TaskPrivatesMapTy =
4193       std::next(TaskFunction->arg_begin(), 3)->getType();
4194   if (!Privates.empty()) {
4195     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4196     TaskPrivatesMap =
4197         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4198     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4199         TaskPrivatesMap, TaskPrivatesMapTy);
4200   } else {
4201     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4202         cast<llvm::PointerType>(TaskPrivatesMapTy));
4203   }
4204   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4205   // kmp_task_t *tt);
4206   llvm::Function *TaskEntry = emitProxyTaskFunction(
4207       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4208       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4209       TaskPrivatesMap);
4210 
4211   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4212   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4213   // kmp_routine_entry_t *task_entry);
4214   // Task flags. Format is taken from
4215   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4216   // description of kmp_tasking_flags struct.
4217   enum {
4218     TiedFlag = 0x1,
4219     FinalFlag = 0x2,
4220     DestructorsFlag = 0x8,
4221     PriorityFlag = 0x20,
4222     DetachableFlag = 0x40,
4223   };
4224   unsigned Flags = Data.Tied ? TiedFlag : 0;
4225   bool NeedsCleanup = false;
4226   if (!Privates.empty()) {
4227     NeedsCleanup =
4228         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4229     if (NeedsCleanup)
4230       Flags = Flags | DestructorsFlag;
4231   }
4232   if (Data.Priority.getInt())
4233     Flags = Flags | PriorityFlag;
4234   if (D.hasClausesOfKind<OMPDetachClause>())
4235     Flags = Flags | DetachableFlag;
4236   llvm::Value *TaskFlags =
4237       Data.Final.getPointer()
4238           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4239                                      CGF.Builder.getInt32(FinalFlag),
4240                                      CGF.Builder.getInt32(/*C=*/0))
4241           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4242   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4243   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4244   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4245       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4246       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4247           TaskEntry, KmpRoutineEntryPtrTy)};
4248   llvm::Value *NewTask;
4249   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4250     // Check if we have any device clause associated with the directive.
4251     const Expr *Device = nullptr;
4252     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4253       Device = C->getDevice();
4254     // Emit device ID if any otherwise use default value.
4255     llvm::Value *DeviceID;
4256     if (Device)
4257       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4258                                            CGF.Int64Ty, /*isSigned=*/true);
4259     else
4260       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4261     AllocArgs.push_back(DeviceID);
4262     NewTask = CGF.EmitRuntimeCall(
4263         OMPBuilder.getOrCreateRuntimeFunction(
4264             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4265         AllocArgs);
4266   } else {
4267     NewTask =
4268         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4269                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4270                             AllocArgs);
4271   }
4272   // Emit detach clause initialization.
4273   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4274   // task_descriptor);
4275   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4276     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4277     LValue EvtLVal = CGF.EmitLValue(Evt);
4278 
4279     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4280     // int gtid, kmp_task_t *task);
4281     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4282     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4283     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4284     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4285         OMPBuilder.getOrCreateRuntimeFunction(
4286             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4287         {Loc, Tid, NewTask});
4288     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4289                                       Evt->getExprLoc());
4290     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4291   }
4292   // Process affinity clauses.
4293   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4294     // Process list of affinity data.
4295     ASTContext &C = CGM.getContext();
4296     Address AffinitiesArray = Address::invalid();
4297     // Calculate number of elements to form the array of affinity data.
4298     llvm::Value *NumOfElements = nullptr;
4299     unsigned NumAffinities = 0;
4300     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4301       if (const Expr *Modifier = C->getModifier()) {
4302         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4303         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4304           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4305           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4306           NumOfElements =
4307               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4308         }
4309       } else {
4310         NumAffinities += C->varlist_size();
4311       }
4312     }
4313     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4314     // Fields ids in kmp_task_affinity_info record.
4315     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4316 
4317     QualType KmpTaskAffinityInfoArrayTy;
4318     if (NumOfElements) {
4319       NumOfElements = CGF.Builder.CreateNUWAdd(
4320           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4321       OpaqueValueExpr OVE(
4322           Loc,
4323           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4324           VK_RValue);
4325       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4326                                                     RValue::get(NumOfElements));
4327       KmpTaskAffinityInfoArrayTy =
4328           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4329                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4330       // Properly emit variable-sized array.
4331       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4332                                            ImplicitParamDecl::Other);
4333       CGF.EmitVarDecl(*PD);
4334       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4335       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4336                                                 /*isSigned=*/false);
4337     } else {
4338       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4339           KmpTaskAffinityInfoTy,
4340           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4341           ArrayType::Normal, /*IndexTypeQuals=*/0);
4342       AffinitiesArray =
4343           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4344       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4345       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4346                                              /*isSigned=*/false);
4347     }
4348 
4349     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4350     // Fill array by elements without iterators.
4351     unsigned Pos = 0;
4352     bool HasIterator = false;
4353     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4354       if (C->getModifier()) {
4355         HasIterator = true;
4356         continue;
4357       }
4358       for (const Expr *E : C->varlists()) {
4359         llvm::Value *Addr;
4360         llvm::Value *Size;
4361         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4362         LValue Base =
4363             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4364                                KmpTaskAffinityInfoTy);
4365         // affs[i].base_addr = &<Affinities[i].second>;
4366         LValue BaseAddrLVal = CGF.EmitLValueForField(
4367             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4368         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4369                               BaseAddrLVal);
4370         // affs[i].len = sizeof(<Affinities[i].second>);
4371         LValue LenLVal = CGF.EmitLValueForField(
4372             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4373         CGF.EmitStoreOfScalar(Size, LenLVal);
4374         ++Pos;
4375       }
4376     }
4377     LValue PosLVal;
4378     if (HasIterator) {
4379       PosLVal = CGF.MakeAddrLValue(
4380           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4381           C.getSizeType());
4382       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4383     }
4384     // Process elements with iterators.
4385     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4386       const Expr *Modifier = C->getModifier();
4387       if (!Modifier)
4388         continue;
4389       OMPIteratorGeneratorScope IteratorScope(
4390           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4391       for (const Expr *E : C->varlists()) {
4392         llvm::Value *Addr;
4393         llvm::Value *Size;
4394         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4395         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4396         LValue Base = CGF.MakeAddrLValue(
4397             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4398                     AffinitiesArray.getAlignment()),
4399             KmpTaskAffinityInfoTy);
4400         // affs[i].base_addr = &<Affinities[i].second>;
4401         LValue BaseAddrLVal = CGF.EmitLValueForField(
4402             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4403         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4404                               BaseAddrLVal);
4405         // affs[i].len = sizeof(<Affinities[i].second>);
4406         LValue LenLVal = CGF.EmitLValueForField(
4407             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4408         CGF.EmitStoreOfScalar(Size, LenLVal);
4409         Idx = CGF.Builder.CreateNUWAdd(
4410             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4411         CGF.EmitStoreOfScalar(Idx, PosLVal);
4412       }
4413     }
4414     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4415     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4416     // naffins, kmp_task_affinity_info_t *affin_list);
4417     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4418     llvm::Value *GTid = getThreadID(CGF, Loc);
4419     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4420         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4421     // FIXME: Emit the function and ignore its result for now unless the
4422     // runtime function is properly implemented.
4423     (void)CGF.EmitRuntimeCall(
4424         OMPBuilder.getOrCreateRuntimeFunction(
4425             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4426         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4427   }
4428   llvm::Value *NewTaskNewTaskTTy =
4429       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4430           NewTask, KmpTaskTWithPrivatesPtrTy);
4431   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4432                                                KmpTaskTWithPrivatesQTy);
4433   LValue TDBase =
4434       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4435   // Fill the data in the resulting kmp_task_t record.
4436   // Copy shareds if there are any.
4437   Address KmpTaskSharedsPtr = Address::invalid();
4438   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4439     KmpTaskSharedsPtr =
4440         Address(CGF.EmitLoadOfScalar(
4441                     CGF.EmitLValueForField(
4442                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4443                                            KmpTaskTShareds)),
4444                     Loc),
4445                 CGM.getNaturalTypeAlignment(SharedsTy));
4446     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4447     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4448     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4449   }
4450   // Emit initial values for private copies (if any).
4451   TaskResultTy Result;
4452   if (!Privates.empty()) {
4453     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4454                      SharedsTy, SharedsPtrTy, Data, Privates,
4455                      /*ForDup=*/false);
4456     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4457         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4458       Result.TaskDupFn = emitTaskDupFunction(
4459           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4460           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4461           /*WithLastIter=*/!Data.LastprivateVars.empty());
4462     }
4463   }
4464   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4465   enum { Priority = 0, Destructors = 1 };
4466   // Provide pointer to function with destructors for privates.
4467   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4468   const RecordDecl *KmpCmplrdataUD =
4469       (*FI)->getType()->getAsUnionType()->getDecl();
4470   if (NeedsCleanup) {
4471     llvm::Value *DestructorFn = emitDestructorsFunction(
4472         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4473         KmpTaskTWithPrivatesQTy);
4474     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4475     LValue DestructorsLV = CGF.EmitLValueForField(
4476         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4477     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4478                               DestructorFn, KmpRoutineEntryPtrTy),
4479                           DestructorsLV);
4480   }
4481   // Set priority.
4482   if (Data.Priority.getInt()) {
4483     LValue Data2LV = CGF.EmitLValueForField(
4484         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4485     LValue PriorityLV = CGF.EmitLValueForField(
4486         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4487     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4488   }
4489   Result.NewTask = NewTask;
4490   Result.TaskEntry = TaskEntry;
4491   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4492   Result.TDBase = TDBase;
4493   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4494   return Result;
4495 }
4496 
4497 namespace {
4498 /// Dependence kind for RTL.
4499 enum RTLDependenceKindTy {
4500   DepIn = 0x01,
4501   DepInOut = 0x3,
4502   DepMutexInOutSet = 0x4
4503 };
4504 /// Fields ids in kmp_depend_info record.
4505 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4506 } // namespace
4507 
4508 /// Translates internal dependency kind into the runtime kind.
4509 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4510   RTLDependenceKindTy DepKind;
4511   switch (K) {
4512   case OMPC_DEPEND_in:
4513     DepKind = DepIn;
4514     break;
4515   // Out and InOut dependencies must use the same code.
4516   case OMPC_DEPEND_out:
4517   case OMPC_DEPEND_inout:
4518     DepKind = DepInOut;
4519     break;
4520   case OMPC_DEPEND_mutexinoutset:
4521     DepKind = DepMutexInOutSet;
4522     break;
4523   case OMPC_DEPEND_source:
4524   case OMPC_DEPEND_sink:
4525   case OMPC_DEPEND_depobj:
4526   case OMPC_DEPEND_unknown:
4527     llvm_unreachable("Unknown task dependence type");
4528   }
4529   return DepKind;
4530 }
4531 
4532 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4533 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4534                            QualType &FlagsTy) {
4535   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4536   if (KmpDependInfoTy.isNull()) {
4537     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4538     KmpDependInfoRD->startDefinition();
4539     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4540     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4541     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4542     KmpDependInfoRD->completeDefinition();
4543     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4544   }
4545 }
4546 
4547 std::pair<llvm::Value *, LValue>
4548 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4549                                    SourceLocation Loc) {
4550   ASTContext &C = CGM.getContext();
4551   QualType FlagsTy;
4552   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4553   RecordDecl *KmpDependInfoRD =
4554       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4555   LValue Base = CGF.EmitLoadOfPointerLValue(
4556       DepobjLVal.getAddress(CGF),
4557       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4558   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4559   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4560           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4561   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4562                             Base.getTBAAInfo());
4563   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4564       Addr.getPointer(),
4565       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4566   LValue NumDepsBase = CGF.MakeAddrLValue(
4567       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4568       Base.getBaseInfo(), Base.getTBAAInfo());
4569   // NumDeps = deps[i].base_addr;
4570   LValue BaseAddrLVal = CGF.EmitLValueForField(
4571       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4572   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4573   return std::make_pair(NumDeps, Base);
4574 }
4575 
4576 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4577                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4578                            const OMPTaskDataTy::DependData &Data,
4579                            Address DependenciesArray) {
4580   CodeGenModule &CGM = CGF.CGM;
4581   ASTContext &C = CGM.getContext();
4582   QualType FlagsTy;
4583   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4584   RecordDecl *KmpDependInfoRD =
4585       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4586   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4587 
4588   OMPIteratorGeneratorScope IteratorScope(
4589       CGF, cast_or_null<OMPIteratorExpr>(
4590                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4591                                  : nullptr));
4592   for (const Expr *E : Data.DepExprs) {
4593     llvm::Value *Addr;
4594     llvm::Value *Size;
4595     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4596     LValue Base;
4597     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4598       Base = CGF.MakeAddrLValue(
4599           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4600     } else {
4601       LValue &PosLVal = *Pos.get<LValue *>();
4602       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4603       Base = CGF.MakeAddrLValue(
4604           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4605                   DependenciesArray.getAlignment()),
4606           KmpDependInfoTy);
4607     }
4608     // deps[i].base_addr = &<Dependencies[i].second>;
4609     LValue BaseAddrLVal = CGF.EmitLValueForField(
4610         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4611     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4612                           BaseAddrLVal);
4613     // deps[i].len = sizeof(<Dependencies[i].second>);
4614     LValue LenLVal = CGF.EmitLValueForField(
4615         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4616     CGF.EmitStoreOfScalar(Size, LenLVal);
4617     // deps[i].flags = <Dependencies[i].first>;
4618     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4619     LValue FlagsLVal = CGF.EmitLValueForField(
4620         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4621     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4622                           FlagsLVal);
4623     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4624       ++(*P);
4625     } else {
4626       LValue &PosLVal = *Pos.get<LValue *>();
4627       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4628       Idx = CGF.Builder.CreateNUWAdd(Idx,
4629                                      llvm::ConstantInt::get(Idx->getType(), 1));
4630       CGF.EmitStoreOfScalar(Idx, PosLVal);
4631     }
4632   }
4633 }
4634 
4635 static SmallVector<llvm::Value *, 4>
4636 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4637                         const OMPTaskDataTy::DependData &Data) {
4638   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4639          "Expected depobj dependecy kind.");
4640   SmallVector<llvm::Value *, 4> Sizes;
4641   SmallVector<LValue, 4> SizeLVals;
4642   ASTContext &C = CGF.getContext();
4643   QualType FlagsTy;
4644   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4645   RecordDecl *KmpDependInfoRD =
4646       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4647   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4648   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4649   {
4650     OMPIteratorGeneratorScope IteratorScope(
4651         CGF, cast_or_null<OMPIteratorExpr>(
4652                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4653                                    : nullptr));
4654     for (const Expr *E : Data.DepExprs) {
4655       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4656       LValue Base = CGF.EmitLoadOfPointerLValue(
4657           DepobjLVal.getAddress(CGF),
4658           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4659       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4660           Base.getAddress(CGF), KmpDependInfoPtrT);
4661       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4662                                 Base.getTBAAInfo());
4663       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4664           Addr.getPointer(),
4665           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4666       LValue NumDepsBase = CGF.MakeAddrLValue(
4667           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4668           Base.getBaseInfo(), Base.getTBAAInfo());
4669       // NumDeps = deps[i].base_addr;
4670       LValue BaseAddrLVal = CGF.EmitLValueForField(
4671           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4672       llvm::Value *NumDeps =
4673           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4674       LValue NumLVal = CGF.MakeAddrLValue(
4675           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4676           C.getUIntPtrType());
4677       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4678                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4679       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4680       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4681       CGF.EmitStoreOfScalar(Add, NumLVal);
4682       SizeLVals.push_back(NumLVal);
4683     }
4684   }
4685   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4686     llvm::Value *Size =
4687         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4688     Sizes.push_back(Size);
4689   }
4690   return Sizes;
4691 }
4692 
4693 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4694                                LValue PosLVal,
4695                                const OMPTaskDataTy::DependData &Data,
4696                                Address DependenciesArray) {
4697   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4698          "Expected depobj dependecy kind.");
4699   ASTContext &C = CGF.getContext();
4700   QualType FlagsTy;
4701   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4702   RecordDecl *KmpDependInfoRD =
4703       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4704   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4705   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4706   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4707   {
4708     OMPIteratorGeneratorScope IteratorScope(
4709         CGF, cast_or_null<OMPIteratorExpr>(
4710                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4711                                    : nullptr));
4712     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4713       const Expr *E = Data.DepExprs[I];
4714       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4715       LValue Base = CGF.EmitLoadOfPointerLValue(
4716           DepobjLVal.getAddress(CGF),
4717           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4718       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4719           Base.getAddress(CGF), KmpDependInfoPtrT);
4720       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4721                                 Base.getTBAAInfo());
4722 
4723       // Get number of elements in a single depobj.
4724       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4725           Addr.getPointer(),
4726           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4727       LValue NumDepsBase = CGF.MakeAddrLValue(
4728           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4729           Base.getBaseInfo(), Base.getTBAAInfo());
4730       // NumDeps = deps[i].base_addr;
4731       LValue BaseAddrLVal = CGF.EmitLValueForField(
4732           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4733       llvm::Value *NumDeps =
4734           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4735 
4736       // memcopy dependency data.
4737       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4738           ElSize,
4739           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4740       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4741       Address DepAddr =
4742           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4743                   DependenciesArray.getAlignment());
4744       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4745 
4746       // Increase pos.
4747       // pos += size;
4748       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4749       CGF.EmitStoreOfScalar(Add, PosLVal);
4750     }
4751   }
4752 }
4753 
4754 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4755     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4756     SourceLocation Loc) {
4757   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4758         return D.DepExprs.empty();
4759       }))
4760     return std::make_pair(nullptr, Address::invalid());
4761   // Process list of dependencies.
4762   ASTContext &C = CGM.getContext();
4763   Address DependenciesArray = Address::invalid();
4764   llvm::Value *NumOfElements = nullptr;
4765   unsigned NumDependencies = std::accumulate(
4766       Dependencies.begin(), Dependencies.end(), 0,
4767       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4768         return D.DepKind == OMPC_DEPEND_depobj
4769                    ? V
4770                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4771       });
4772   QualType FlagsTy;
4773   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4774   bool HasDepobjDeps = false;
4775   bool HasRegularWithIterators = false;
4776   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4777   llvm::Value *NumOfRegularWithIterators =
4778       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4779   // Calculate number of depobj dependecies and regular deps with the iterators.
4780   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4781     if (D.DepKind == OMPC_DEPEND_depobj) {
4782       SmallVector<llvm::Value *, 4> Sizes =
4783           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4784       for (llvm::Value *Size : Sizes) {
4785         NumOfDepobjElements =
4786             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4787       }
4788       HasDepobjDeps = true;
4789       continue;
4790     }
4791     // Include number of iterations, if any.
4792     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4793       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4794         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4795         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4796         NumOfRegularWithIterators =
4797             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4798       }
4799       HasRegularWithIterators = true;
4800       continue;
4801     }
4802   }
4803 
4804   QualType KmpDependInfoArrayTy;
4805   if (HasDepobjDeps || HasRegularWithIterators) {
4806     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4807                                            /*isSigned=*/false);
4808     if (HasDepobjDeps) {
4809       NumOfElements =
4810           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4811     }
4812     if (HasRegularWithIterators) {
4813       NumOfElements =
4814           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4815     }
4816     OpaqueValueExpr OVE(Loc,
4817                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4818                         VK_RValue);
4819     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4820                                                   RValue::get(NumOfElements));
4821     KmpDependInfoArrayTy =
4822         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4823                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4824     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4825     // Properly emit variable-sized array.
4826     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4827                                          ImplicitParamDecl::Other);
4828     CGF.EmitVarDecl(*PD);
4829     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4830     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4831                                               /*isSigned=*/false);
4832   } else {
4833     KmpDependInfoArrayTy = C.getConstantArrayType(
4834         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4835         ArrayType::Normal, /*IndexTypeQuals=*/0);
4836     DependenciesArray =
4837         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4838     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4839     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4840                                            /*isSigned=*/false);
4841   }
4842   unsigned Pos = 0;
4843   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4844     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4845         Dependencies[I].IteratorExpr)
4846       continue;
4847     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4848                    DependenciesArray);
4849   }
4850   // Copy regular dependecies with iterators.
4851   LValue PosLVal = CGF.MakeAddrLValue(
4852       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4853   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4854   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4855     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4856         !Dependencies[I].IteratorExpr)
4857       continue;
4858     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4859                    DependenciesArray);
4860   }
4861   // Copy final depobj arrays without iterators.
4862   if (HasDepobjDeps) {
4863     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4864       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4865         continue;
4866       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4867                          DependenciesArray);
4868     }
4869   }
4870   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4871       DependenciesArray, CGF.VoidPtrTy);
4872   return std::make_pair(NumOfElements, DependenciesArray);
4873 }
4874 
4875 Address CGOpenMPRuntime::emitDepobjDependClause(
4876     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4877     SourceLocation Loc) {
4878   if (Dependencies.DepExprs.empty())
4879     return Address::invalid();
4880   // Process list of dependencies.
4881   ASTContext &C = CGM.getContext();
4882   Address DependenciesArray = Address::invalid();
4883   unsigned NumDependencies = Dependencies.DepExprs.size();
4884   QualType FlagsTy;
4885   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4886   RecordDecl *KmpDependInfoRD =
4887       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4888 
4889   llvm::Value *Size;
4890   // Define type kmp_depend_info[<Dependencies.size()>];
4891   // For depobj reserve one extra element to store the number of elements.
4892   // It is required to handle depobj(x) update(in) construct.
4893   // kmp_depend_info[<Dependencies.size()>] deps;
4894   llvm::Value *NumDepsVal;
4895   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4896   if (const auto *IE =
4897           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4898     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4899     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4900       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4901       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4902       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4903     }
4904     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4905                                     NumDepsVal);
4906     CharUnits SizeInBytes =
4907         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4908     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4909     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4910     NumDepsVal =
4911         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4912   } else {
4913     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4914         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4915         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4916     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4917     Size = CGM.getSize(Sz.alignTo(Align));
4918     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4919   }
4920   // Need to allocate on the dynamic memory.
4921   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4922   // Use default allocator.
4923   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4924   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4925 
4926   llvm::Value *Addr =
4927       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4928                               CGM.getModule(), OMPRTL___kmpc_alloc),
4929                           Args, ".dep.arr.addr");
4930   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4931       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4932   DependenciesArray = Address(Addr, Align);
4933   // Write number of elements in the first element of array for depobj.
4934   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4935   // deps[i].base_addr = NumDependencies;
4936   LValue BaseAddrLVal = CGF.EmitLValueForField(
4937       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4938   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4939   llvm::PointerUnion<unsigned *, LValue *> Pos;
4940   unsigned Idx = 1;
4941   LValue PosLVal;
4942   if (Dependencies.IteratorExpr) {
4943     PosLVal = CGF.MakeAddrLValue(
4944         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4945         C.getSizeType());
4946     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4947                           /*IsInit=*/true);
4948     Pos = &PosLVal;
4949   } else {
4950     Pos = &Idx;
4951   }
4952   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4953   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4954       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4955   return DependenciesArray;
4956 }
4957 
4958 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4959                                         SourceLocation Loc) {
4960   ASTContext &C = CGM.getContext();
4961   QualType FlagsTy;
4962   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4963   LValue Base = CGF.EmitLoadOfPointerLValue(
4964       DepobjLVal.getAddress(CGF),
4965       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4966   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4967   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4968       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4969   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4970       Addr.getPointer(),
4971       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4972   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4973                                                                CGF.VoidPtrTy);
4974   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4975   // Use default allocator.
4976   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4977   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4978 
4979   // _kmpc_free(gtid, addr, nullptr);
4980   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4981                                 CGM.getModule(), OMPRTL___kmpc_free),
4982                             Args);
4983 }
4984 
4985 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4986                                        OpenMPDependClauseKind NewDepKind,
4987                                        SourceLocation Loc) {
4988   ASTContext &C = CGM.getContext();
4989   QualType FlagsTy;
4990   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4991   RecordDecl *KmpDependInfoRD =
4992       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4993   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4994   llvm::Value *NumDeps;
4995   LValue Base;
4996   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4997 
4998   Address Begin = Base.getAddress(CGF);
4999   // Cast from pointer to array type to pointer to single element.
5000   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5001   // The basic structure here is a while-do loop.
5002   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5003   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5004   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5005   CGF.EmitBlock(BodyBB);
5006   llvm::PHINode *ElementPHI =
5007       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5008   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5009   Begin = Address(ElementPHI, Begin.getAlignment());
5010   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5011                             Base.getTBAAInfo());
5012   // deps[i].flags = NewDepKind;
5013   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5014   LValue FlagsLVal = CGF.EmitLValueForField(
5015       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5016   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5017                         FlagsLVal);
5018 
5019   // Shift the address forward by one element.
5020   Address ElementNext =
5021       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5022   ElementPHI->addIncoming(ElementNext.getPointer(),
5023                           CGF.Builder.GetInsertBlock());
5024   llvm::Value *IsEmpty =
5025       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5026   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5027   // Done.
5028   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5029 }
5030 
5031 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5032                                    const OMPExecutableDirective &D,
5033                                    llvm::Function *TaskFunction,
5034                                    QualType SharedsTy, Address Shareds,
5035                                    const Expr *IfCond,
5036                                    const OMPTaskDataTy &Data) {
5037   if (!CGF.HaveInsertPoint())
5038     return;
5039 
5040   TaskResultTy Result =
5041       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5042   llvm::Value *NewTask = Result.NewTask;
5043   llvm::Function *TaskEntry = Result.TaskEntry;
5044   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5045   LValue TDBase = Result.TDBase;
5046   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5047   // Process list of dependences.
5048   Address DependenciesArray = Address::invalid();
5049   llvm::Value *NumOfElements;
5050   std::tie(NumOfElements, DependenciesArray) =
5051       emitDependClause(CGF, Data.Dependences, Loc);
5052 
5053   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5054   // libcall.
5055   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5056   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5057   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5058   // list is not empty
5059   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5060   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5061   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5062   llvm::Value *DepTaskArgs[7];
5063   if (!Data.Dependences.empty()) {
5064     DepTaskArgs[0] = UpLoc;
5065     DepTaskArgs[1] = ThreadID;
5066     DepTaskArgs[2] = NewTask;
5067     DepTaskArgs[3] = NumOfElements;
5068     DepTaskArgs[4] = DependenciesArray.getPointer();
5069     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5070     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5071   }
5072   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5073                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5074     if (!Data.Tied) {
5075       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5076       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5077       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5078     }
5079     if (!Data.Dependences.empty()) {
5080       CGF.EmitRuntimeCall(
5081           OMPBuilder.getOrCreateRuntimeFunction(
5082               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5083           DepTaskArgs);
5084     } else {
5085       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5086                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5087                           TaskArgs);
5088     }
5089     // Check if parent region is untied and build return for untied task;
5090     if (auto *Region =
5091             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5092       Region->emitUntiedSwitch(CGF);
5093   };
5094 
5095   llvm::Value *DepWaitTaskArgs[6];
5096   if (!Data.Dependences.empty()) {
5097     DepWaitTaskArgs[0] = UpLoc;
5098     DepWaitTaskArgs[1] = ThreadID;
5099     DepWaitTaskArgs[2] = NumOfElements;
5100     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5101     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5102     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5103   }
5104   auto &M = CGM.getModule();
5105   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5106                         TaskEntry, &Data, &DepWaitTaskArgs,
5107                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5108     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5109     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5110     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5111     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5112     // is specified.
5113     if (!Data.Dependences.empty())
5114       CGF.EmitRuntimeCall(
5115           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5116           DepWaitTaskArgs);
5117     // Call proxy_task_entry(gtid, new_task);
5118     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5119                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5120       Action.Enter(CGF);
5121       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5122       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5123                                                           OutlinedFnArgs);
5124     };
5125 
5126     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5127     // kmp_task_t *new_task);
5128     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5129     // kmp_task_t *new_task);
5130     RegionCodeGenTy RCG(CodeGen);
5131     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5132                               M, OMPRTL___kmpc_omp_task_begin_if0),
5133                           TaskArgs,
5134                           OMPBuilder.getOrCreateRuntimeFunction(
5135                               M, OMPRTL___kmpc_omp_task_complete_if0),
5136                           TaskArgs);
5137     RCG.setAction(Action);
5138     RCG(CGF);
5139   };
5140 
5141   if (IfCond) {
5142     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5143   } else {
5144     RegionCodeGenTy ThenRCG(ThenCodeGen);
5145     ThenRCG(CGF);
5146   }
5147 }
5148 
5149 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5150                                        const OMPLoopDirective &D,
5151                                        llvm::Function *TaskFunction,
5152                                        QualType SharedsTy, Address Shareds,
5153                                        const Expr *IfCond,
5154                                        const OMPTaskDataTy &Data) {
5155   if (!CGF.HaveInsertPoint())
5156     return;
5157   TaskResultTy Result =
5158       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5159   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5160   // libcall.
5161   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5162   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5163   // sched, kmp_uint64 grainsize, void *task_dup);
5164   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5165   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5166   llvm::Value *IfVal;
5167   if (IfCond) {
5168     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5169                                       /*isSigned=*/true);
5170   } else {
5171     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5172   }
5173 
5174   LValue LBLVal = CGF.EmitLValueForField(
5175       Result.TDBase,
5176       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5177   const auto *LBVar =
5178       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5179   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5180                        LBLVal.getQuals(),
5181                        /*IsInitializer=*/true);
5182   LValue UBLVal = CGF.EmitLValueForField(
5183       Result.TDBase,
5184       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5185   const auto *UBVar =
5186       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5187   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5188                        UBLVal.getQuals(),
5189                        /*IsInitializer=*/true);
5190   LValue StLVal = CGF.EmitLValueForField(
5191       Result.TDBase,
5192       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5193   const auto *StVar =
5194       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5195   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5196                        StLVal.getQuals(),
5197                        /*IsInitializer=*/true);
5198   // Store reductions address.
5199   LValue RedLVal = CGF.EmitLValueForField(
5200       Result.TDBase,
5201       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5202   if (Data.Reductions) {
5203     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5204   } else {
5205     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5206                                CGF.getContext().VoidPtrTy);
5207   }
5208   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5209   llvm::Value *TaskArgs[] = {
5210       UpLoc,
5211       ThreadID,
5212       Result.NewTask,
5213       IfVal,
5214       LBLVal.getPointer(CGF),
5215       UBLVal.getPointer(CGF),
5216       CGF.EmitLoadOfScalar(StLVal, Loc),
5217       llvm::ConstantInt::getSigned(
5218           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5219       llvm::ConstantInt::getSigned(
5220           CGF.IntTy, Data.Schedule.getPointer()
5221                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5222                          : NoSchedule),
5223       Data.Schedule.getPointer()
5224           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5225                                       /*isSigned=*/false)
5226           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5227       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5228                              Result.TaskDupFn, CGF.VoidPtrTy)
5229                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5230   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5231                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5232                       TaskArgs);
5233 }
5234 
5235 /// Emit reduction operation for each element of array (required for
5236 /// array sections) LHS op = RHS.
5237 /// \param Type Type of array.
5238 /// \param LHSVar Variable on the left side of the reduction operation
5239 /// (references element of array in original variable).
5240 /// \param RHSVar Variable on the right side of the reduction operation
5241 /// (references element of array in original variable).
5242 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5243 /// RHSVar.
5244 static void EmitOMPAggregateReduction(
5245     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5246     const VarDecl *RHSVar,
5247     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5248                                   const Expr *, const Expr *)> &RedOpGen,
5249     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5250     const Expr *UpExpr = nullptr) {
5251   // Perform element-by-element initialization.
5252   QualType ElementTy;
5253   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5254   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5255 
5256   // Drill down to the base element type on both arrays.
5257   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5258   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5259 
5260   llvm::Value *RHSBegin = RHSAddr.getPointer();
5261   llvm::Value *LHSBegin = LHSAddr.getPointer();
5262   // Cast from pointer to array type to pointer to single element.
5263   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5264   // The basic structure here is a while-do loop.
5265   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5266   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5267   llvm::Value *IsEmpty =
5268       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5269   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5270 
5271   // Enter the loop body, making that address the current address.
5272   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5273   CGF.EmitBlock(BodyBB);
5274 
5275   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5276 
5277   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5278       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5279   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5280   Address RHSElementCurrent =
5281       Address(RHSElementPHI,
5282               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5283 
5284   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5285       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5286   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5287   Address LHSElementCurrent =
5288       Address(LHSElementPHI,
5289               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5290 
5291   // Emit copy.
5292   CodeGenFunction::OMPPrivateScope Scope(CGF);
5293   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5294   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5295   Scope.Privatize();
5296   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5297   Scope.ForceCleanup();
5298 
5299   // Shift the address forward by one element.
5300   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5301       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5302   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5303       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5304   // Check whether we've reached the end.
5305   llvm::Value *Done =
5306       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5307   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5308   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5309   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5310 
5311   // Done.
5312   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5313 }
5314 
5315 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5316 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5317 /// UDR combiner function.
5318 static void emitReductionCombiner(CodeGenFunction &CGF,
5319                                   const Expr *ReductionOp) {
5320   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5321     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5322       if (const auto *DRE =
5323               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5324         if (const auto *DRD =
5325                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5326           std::pair<llvm::Function *, llvm::Function *> Reduction =
5327               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5328           RValue Func = RValue::get(Reduction.first);
5329           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5330           CGF.EmitIgnoredExpr(ReductionOp);
5331           return;
5332         }
5333   CGF.EmitIgnoredExpr(ReductionOp);
5334 }
5335 
5336 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5337     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5338     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5339     ArrayRef<const Expr *> ReductionOps) {
5340   ASTContext &C = CGM.getContext();
5341 
5342   // void reduction_func(void *LHSArg, void *RHSArg);
5343   FunctionArgList Args;
5344   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5345                            ImplicitParamDecl::Other);
5346   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5347                            ImplicitParamDecl::Other);
5348   Args.push_back(&LHSArg);
5349   Args.push_back(&RHSArg);
5350   const auto &CGFI =
5351       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5352   std::string Name = getName({"omp", "reduction", "reduction_func"});
5353   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5354                                     llvm::GlobalValue::InternalLinkage, Name,
5355                                     &CGM.getModule());
5356   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5357   Fn->setDoesNotRecurse();
5358   CodeGenFunction CGF(CGM);
5359   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5360 
5361   // Dst = (void*[n])(LHSArg);
5362   // Src = (void*[n])(RHSArg);
5363   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5364       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5365       ArgsType), CGF.getPointerAlign());
5366   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5367       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5368       ArgsType), CGF.getPointerAlign());
5369 
5370   //  ...
5371   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5372   //  ...
5373   CodeGenFunction::OMPPrivateScope Scope(CGF);
5374   auto IPriv = Privates.begin();
5375   unsigned Idx = 0;
5376   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5377     const auto *RHSVar =
5378         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5379     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5380       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5381     });
5382     const auto *LHSVar =
5383         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5384     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5385       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5386     });
5387     QualType PrivTy = (*IPriv)->getType();
5388     if (PrivTy->isVariablyModifiedType()) {
5389       // Get array size and emit VLA type.
5390       ++Idx;
5391       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5392       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5393       const VariableArrayType *VLA =
5394           CGF.getContext().getAsVariableArrayType(PrivTy);
5395       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5396       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5397           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5398       CGF.EmitVariablyModifiedType(PrivTy);
5399     }
5400   }
5401   Scope.Privatize();
5402   IPriv = Privates.begin();
5403   auto ILHS = LHSExprs.begin();
5404   auto IRHS = RHSExprs.begin();
5405   for (const Expr *E : ReductionOps) {
5406     if ((*IPriv)->getType()->isArrayType()) {
5407       // Emit reduction for array section.
5408       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5409       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5410       EmitOMPAggregateReduction(
5411           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5412           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5413             emitReductionCombiner(CGF, E);
5414           });
5415     } else {
5416       // Emit reduction for array subscript or single variable.
5417       emitReductionCombiner(CGF, E);
5418     }
5419     ++IPriv;
5420     ++ILHS;
5421     ++IRHS;
5422   }
5423   Scope.ForceCleanup();
5424   CGF.FinishFunction();
5425   return Fn;
5426 }
5427 
5428 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5429                                                   const Expr *ReductionOp,
5430                                                   const Expr *PrivateRef,
5431                                                   const DeclRefExpr *LHS,
5432                                                   const DeclRefExpr *RHS) {
5433   if (PrivateRef->getType()->isArrayType()) {
5434     // Emit reduction for array section.
5435     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5436     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5437     EmitOMPAggregateReduction(
5438         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5439         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5440           emitReductionCombiner(CGF, ReductionOp);
5441         });
5442   } else {
5443     // Emit reduction for array subscript or single variable.
5444     emitReductionCombiner(CGF, ReductionOp);
5445   }
5446 }
5447 
5448 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5449                                     ArrayRef<const Expr *> Privates,
5450                                     ArrayRef<const Expr *> LHSExprs,
5451                                     ArrayRef<const Expr *> RHSExprs,
5452                                     ArrayRef<const Expr *> ReductionOps,
5453                                     ReductionOptionsTy Options) {
5454   if (!CGF.HaveInsertPoint())
5455     return;
5456 
5457   bool WithNowait = Options.WithNowait;
5458   bool SimpleReduction = Options.SimpleReduction;
5459 
5460   // Next code should be emitted for reduction:
5461   //
5462   // static kmp_critical_name lock = { 0 };
5463   //
5464   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5465   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5466   //  ...
5467   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5468   //  *(Type<n>-1*)rhs[<n>-1]);
5469   // }
5470   //
5471   // ...
5472   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5473   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5474   // RedList, reduce_func, &<lock>)) {
5475   // case 1:
5476   //  ...
5477   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5478   //  ...
5479   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5480   // break;
5481   // case 2:
5482   //  ...
5483   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5484   //  ...
5485   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5486   // break;
5487   // default:;
5488   // }
5489   //
5490   // if SimpleReduction is true, only the next code is generated:
5491   //  ...
5492   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5493   //  ...
5494 
5495   ASTContext &C = CGM.getContext();
5496 
5497   if (SimpleReduction) {
5498     CodeGenFunction::RunCleanupsScope Scope(CGF);
5499     auto IPriv = Privates.begin();
5500     auto ILHS = LHSExprs.begin();
5501     auto IRHS = RHSExprs.begin();
5502     for (const Expr *E : ReductionOps) {
5503       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5504                                   cast<DeclRefExpr>(*IRHS));
5505       ++IPriv;
5506       ++ILHS;
5507       ++IRHS;
5508     }
5509     return;
5510   }
5511 
5512   // 1. Build a list of reduction variables.
5513   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5514   auto Size = RHSExprs.size();
5515   for (const Expr *E : Privates) {
5516     if (E->getType()->isVariablyModifiedType())
5517       // Reserve place for array size.
5518       ++Size;
5519   }
5520   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5521   QualType ReductionArrayTy =
5522       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5523                              /*IndexTypeQuals=*/0);
5524   Address ReductionList =
5525       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5526   auto IPriv = Privates.begin();
5527   unsigned Idx = 0;
5528   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5529     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5530     CGF.Builder.CreateStore(
5531         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5532             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5533         Elem);
5534     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5535       // Store array size.
5536       ++Idx;
5537       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5538       llvm::Value *Size = CGF.Builder.CreateIntCast(
5539           CGF.getVLASize(
5540                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5541               .NumElts,
5542           CGF.SizeTy, /*isSigned=*/false);
5543       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5544                               Elem);
5545     }
5546   }
5547 
5548   // 2. Emit reduce_func().
5549   llvm::Function *ReductionFn = emitReductionFunction(
5550       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5551       LHSExprs, RHSExprs, ReductionOps);
5552 
5553   // 3. Create static kmp_critical_name lock = { 0 };
5554   std::string Name = getName({"reduction"});
5555   llvm::Value *Lock = getCriticalRegionLock(Name);
5556 
5557   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5558   // RedList, reduce_func, &<lock>);
5559   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5560   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5561   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5562   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5563       ReductionList.getPointer(), CGF.VoidPtrTy);
5564   llvm::Value *Args[] = {
5565       IdentTLoc,                             // ident_t *<loc>
5566       ThreadId,                              // i32 <gtid>
5567       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5568       ReductionArrayTySize,                  // size_type sizeof(RedList)
5569       RL,                                    // void *RedList
5570       ReductionFn, // void (*) (void *, void *) <reduce_func>
5571       Lock         // kmp_critical_name *&<lock>
5572   };
5573   llvm::Value *Res = CGF.EmitRuntimeCall(
5574       OMPBuilder.getOrCreateRuntimeFunction(
5575           CGM.getModule(),
5576           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5577       Args);
5578 
5579   // 5. Build switch(res)
5580   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5581   llvm::SwitchInst *SwInst =
5582       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5583 
5584   // 6. Build case 1:
5585   //  ...
5586   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5587   //  ...
5588   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5589   // break;
5590   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5591   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5592   CGF.EmitBlock(Case1BB);
5593 
5594   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5595   llvm::Value *EndArgs[] = {
5596       IdentTLoc, // ident_t *<loc>
5597       ThreadId,  // i32 <gtid>
5598       Lock       // kmp_critical_name *&<lock>
5599   };
5600   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5601                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5602     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5603     auto IPriv = Privates.begin();
5604     auto ILHS = LHSExprs.begin();
5605     auto IRHS = RHSExprs.begin();
5606     for (const Expr *E : ReductionOps) {
5607       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5608                                      cast<DeclRefExpr>(*IRHS));
5609       ++IPriv;
5610       ++ILHS;
5611       ++IRHS;
5612     }
5613   };
5614   RegionCodeGenTy RCG(CodeGen);
5615   CommonActionTy Action(
5616       nullptr, llvm::None,
5617       OMPBuilder.getOrCreateRuntimeFunction(
5618           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5619                                       : OMPRTL___kmpc_end_reduce),
5620       EndArgs);
5621   RCG.setAction(Action);
5622   RCG(CGF);
5623 
5624   CGF.EmitBranch(DefaultBB);
5625 
5626   // 7. Build case 2:
5627   //  ...
5628   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5629   //  ...
5630   // break;
5631   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5632   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5633   CGF.EmitBlock(Case2BB);
5634 
5635   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5636                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5637     auto ILHS = LHSExprs.begin();
5638     auto IRHS = RHSExprs.begin();
5639     auto IPriv = Privates.begin();
5640     for (const Expr *E : ReductionOps) {
5641       const Expr *XExpr = nullptr;
5642       const Expr *EExpr = nullptr;
5643       const Expr *UpExpr = nullptr;
5644       BinaryOperatorKind BO = BO_Comma;
5645       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5646         if (BO->getOpcode() == BO_Assign) {
5647           XExpr = BO->getLHS();
5648           UpExpr = BO->getRHS();
5649         }
5650       }
5651       // Try to emit update expression as a simple atomic.
5652       const Expr *RHSExpr = UpExpr;
5653       if (RHSExpr) {
5654         // Analyze RHS part of the whole expression.
5655         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5656                 RHSExpr->IgnoreParenImpCasts())) {
5657           // If this is a conditional operator, analyze its condition for
5658           // min/max reduction operator.
5659           RHSExpr = ACO->getCond();
5660         }
5661         if (const auto *BORHS =
5662                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5663           EExpr = BORHS->getRHS();
5664           BO = BORHS->getOpcode();
5665         }
5666       }
5667       if (XExpr) {
5668         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5669         auto &&AtomicRedGen = [BO, VD,
5670                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5671                                     const Expr *EExpr, const Expr *UpExpr) {
5672           LValue X = CGF.EmitLValue(XExpr);
5673           RValue E;
5674           if (EExpr)
5675             E = CGF.EmitAnyExpr(EExpr);
5676           CGF.EmitOMPAtomicSimpleUpdateExpr(
5677               X, E, BO, /*IsXLHSInRHSPart=*/true,
5678               llvm::AtomicOrdering::Monotonic, Loc,
5679               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5680                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5681                 PrivateScope.addPrivate(
5682                     VD, [&CGF, VD, XRValue, Loc]() {
5683                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5684                       CGF.emitOMPSimpleStore(
5685                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5686                           VD->getType().getNonReferenceType(), Loc);
5687                       return LHSTemp;
5688                     });
5689                 (void)PrivateScope.Privatize();
5690                 return CGF.EmitAnyExpr(UpExpr);
5691               });
5692         };
5693         if ((*IPriv)->getType()->isArrayType()) {
5694           // Emit atomic reduction for array section.
5695           const auto *RHSVar =
5696               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5697           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5698                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5699         } else {
5700           // Emit atomic reduction for array subscript or single variable.
5701           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5702         }
5703       } else {
5704         // Emit as a critical region.
5705         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5706                                            const Expr *, const Expr *) {
5707           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5708           std::string Name = RT.getName({"atomic_reduction"});
5709           RT.emitCriticalRegion(
5710               CGF, Name,
5711               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5712                 Action.Enter(CGF);
5713                 emitReductionCombiner(CGF, E);
5714               },
5715               Loc);
5716         };
5717         if ((*IPriv)->getType()->isArrayType()) {
5718           const auto *LHSVar =
5719               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5720           const auto *RHSVar =
5721               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5722           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5723                                     CritRedGen);
5724         } else {
5725           CritRedGen(CGF, nullptr, nullptr, nullptr);
5726         }
5727       }
5728       ++ILHS;
5729       ++IRHS;
5730       ++IPriv;
5731     }
5732   };
5733   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5734   if (!WithNowait) {
5735     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5736     llvm::Value *EndArgs[] = {
5737         IdentTLoc, // ident_t *<loc>
5738         ThreadId,  // i32 <gtid>
5739         Lock       // kmp_critical_name *&<lock>
5740     };
5741     CommonActionTy Action(nullptr, llvm::None,
5742                           OMPBuilder.getOrCreateRuntimeFunction(
5743                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5744                           EndArgs);
5745     AtomicRCG.setAction(Action);
5746     AtomicRCG(CGF);
5747   } else {
5748     AtomicRCG(CGF);
5749   }
5750 
5751   CGF.EmitBranch(DefaultBB);
5752   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5753 }
5754 
5755 /// Generates unique name for artificial threadprivate variables.
5756 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5757 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5758                                       const Expr *Ref) {
5759   SmallString<256> Buffer;
5760   llvm::raw_svector_ostream Out(Buffer);
5761   const clang::DeclRefExpr *DE;
5762   const VarDecl *D = ::getBaseDecl(Ref, DE);
5763   if (!D)
5764     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5765   D = D->getCanonicalDecl();
5766   std::string Name = CGM.getOpenMPRuntime().getName(
5767       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5768   Out << Prefix << Name << "_"
5769       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5770   return std::string(Out.str());
5771 }
5772 
5773 /// Emits reduction initializer function:
5774 /// \code
5775 /// void @.red_init(void* %arg, void* %orig) {
5776 /// %0 = bitcast void* %arg to <type>*
5777 /// store <type> <init>, <type>* %0
5778 /// ret void
5779 /// }
5780 /// \endcode
5781 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5782                                            SourceLocation Loc,
5783                                            ReductionCodeGen &RCG, unsigned N) {
5784   ASTContext &C = CGM.getContext();
5785   QualType VoidPtrTy = C.VoidPtrTy;
5786   VoidPtrTy.addRestrict();
5787   FunctionArgList Args;
5788   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5789                           ImplicitParamDecl::Other);
5790   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5791                               ImplicitParamDecl::Other);
5792   Args.emplace_back(&Param);
5793   Args.emplace_back(&ParamOrig);
5794   const auto &FnInfo =
5795       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5796   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5797   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5798   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5799                                     Name, &CGM.getModule());
5800   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5801   Fn->setDoesNotRecurse();
5802   CodeGenFunction CGF(CGM);
5803   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5804   Address PrivateAddr = CGF.EmitLoadOfPointer(
5805       CGF.GetAddrOfLocalVar(&Param),
5806       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5807   llvm::Value *Size = nullptr;
5808   // If the size of the reduction item is non-constant, load it from global
5809   // threadprivate variable.
5810   if (RCG.getSizes(N).second) {
5811     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5812         CGF, CGM.getContext().getSizeType(),
5813         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5814     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5815                                 CGM.getContext().getSizeType(), Loc);
5816   }
5817   RCG.emitAggregateType(CGF, N, Size);
5818   LValue OrigLVal;
5819   // If initializer uses initializer from declare reduction construct, emit a
5820   // pointer to the address of the original reduction item (reuired by reduction
5821   // initializer)
5822   if (RCG.usesReductionInitializer(N)) {
5823     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5824     SharedAddr = CGF.EmitLoadOfPointer(
5825         SharedAddr,
5826         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5827     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5828   } else {
5829     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5830         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5831         CGM.getContext().VoidPtrTy);
5832   }
5833   // Emit the initializer:
5834   // %0 = bitcast void* %arg to <type>*
5835   // store <type> <init>, <type>* %0
5836   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5837                          [](CodeGenFunction &) { return false; });
5838   CGF.FinishFunction();
5839   return Fn;
5840 }
5841 
5842 /// Emits reduction combiner function:
5843 /// \code
5844 /// void @.red_comb(void* %arg0, void* %arg1) {
5845 /// %lhs = bitcast void* %arg0 to <type>*
5846 /// %rhs = bitcast void* %arg1 to <type>*
5847 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5848 /// store <type> %2, <type>* %lhs
5849 /// ret void
5850 /// }
5851 /// \endcode
5852 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5853                                            SourceLocation Loc,
5854                                            ReductionCodeGen &RCG, unsigned N,
5855                                            const Expr *ReductionOp,
5856                                            const Expr *LHS, const Expr *RHS,
5857                                            const Expr *PrivateRef) {
5858   ASTContext &C = CGM.getContext();
5859   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5860   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5861   FunctionArgList Args;
5862   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5863                                C.VoidPtrTy, ImplicitParamDecl::Other);
5864   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5865                             ImplicitParamDecl::Other);
5866   Args.emplace_back(&ParamInOut);
5867   Args.emplace_back(&ParamIn);
5868   const auto &FnInfo =
5869       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5870   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5871   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5872   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5873                                     Name, &CGM.getModule());
5874   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5875   Fn->setDoesNotRecurse();
5876   CodeGenFunction CGF(CGM);
5877   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5878   llvm::Value *Size = nullptr;
5879   // If the size of the reduction item is non-constant, load it from global
5880   // threadprivate variable.
5881   if (RCG.getSizes(N).second) {
5882     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5883         CGF, CGM.getContext().getSizeType(),
5884         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5885     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5886                                 CGM.getContext().getSizeType(), Loc);
5887   }
5888   RCG.emitAggregateType(CGF, N, Size);
5889   // Remap lhs and rhs variables to the addresses of the function arguments.
5890   // %lhs = bitcast void* %arg0 to <type>*
5891   // %rhs = bitcast void* %arg1 to <type>*
5892   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5893   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5894     // Pull out the pointer to the variable.
5895     Address PtrAddr = CGF.EmitLoadOfPointer(
5896         CGF.GetAddrOfLocalVar(&ParamInOut),
5897         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5898     return CGF.Builder.CreateElementBitCast(
5899         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5900   });
5901   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5902     // Pull out the pointer to the variable.
5903     Address PtrAddr = CGF.EmitLoadOfPointer(
5904         CGF.GetAddrOfLocalVar(&ParamIn),
5905         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5906     return CGF.Builder.CreateElementBitCast(
5907         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5908   });
5909   PrivateScope.Privatize();
5910   // Emit the combiner body:
5911   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5912   // store <type> %2, <type>* %lhs
5913   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5914       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5915       cast<DeclRefExpr>(RHS));
5916   CGF.FinishFunction();
5917   return Fn;
5918 }
5919 
5920 /// Emits reduction finalizer function:
5921 /// \code
5922 /// void @.red_fini(void* %arg) {
5923 /// %0 = bitcast void* %arg to <type>*
5924 /// <destroy>(<type>* %0)
5925 /// ret void
5926 /// }
5927 /// \endcode
5928 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5929                                            SourceLocation Loc,
5930                                            ReductionCodeGen &RCG, unsigned N) {
5931   if (!RCG.needCleanups(N))
5932     return nullptr;
5933   ASTContext &C = CGM.getContext();
5934   FunctionArgList Args;
5935   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5936                           ImplicitParamDecl::Other);
5937   Args.emplace_back(&Param);
5938   const auto &FnInfo =
5939       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5940   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5941   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5942   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5943                                     Name, &CGM.getModule());
5944   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5945   Fn->setDoesNotRecurse();
5946   CodeGenFunction CGF(CGM);
5947   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5948   Address PrivateAddr = CGF.EmitLoadOfPointer(
5949       CGF.GetAddrOfLocalVar(&Param),
5950       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5951   llvm::Value *Size = nullptr;
5952   // If the size of the reduction item is non-constant, load it from global
5953   // threadprivate variable.
5954   if (RCG.getSizes(N).second) {
5955     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5956         CGF, CGM.getContext().getSizeType(),
5957         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5958     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5959                                 CGM.getContext().getSizeType(), Loc);
5960   }
5961   RCG.emitAggregateType(CGF, N, Size);
5962   // Emit the finalizer body:
5963   // <destroy>(<type>* %0)
5964   RCG.emitCleanups(CGF, N, PrivateAddr);
5965   CGF.FinishFunction(Loc);
5966   return Fn;
5967 }
5968 
5969 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5970     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5971     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5972   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5973     return nullptr;
5974 
5975   // Build typedef struct:
5976   // kmp_taskred_input {
5977   //   void *reduce_shar; // shared reduction item
5978   //   void *reduce_orig; // original reduction item used for initialization
5979   //   size_t reduce_size; // size of data item
5980   //   void *reduce_init; // data initialization routine
5981   //   void *reduce_fini; // data finalization routine
5982   //   void *reduce_comb; // data combiner routine
5983   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5984   // } kmp_taskred_input_t;
5985   ASTContext &C = CGM.getContext();
5986   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5987   RD->startDefinition();
5988   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5989   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5990   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5991   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5992   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5993   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5994   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5995       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5996   RD->completeDefinition();
5997   QualType RDType = C.getRecordType(RD);
5998   unsigned Size = Data.ReductionVars.size();
5999   llvm::APInt ArraySize(/*numBits=*/64, Size);
6000   QualType ArrayRDType = C.getConstantArrayType(
6001       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6002   // kmp_task_red_input_t .rd_input.[Size];
6003   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6004   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6005                        Data.ReductionCopies, Data.ReductionOps);
6006   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6007     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6008     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6009                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6010     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6011         TaskRedInput.getPointer(), Idxs,
6012         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6013         ".rd_input.gep.");
6014     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6015     // ElemLVal.reduce_shar = &Shareds[Cnt];
6016     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6017     RCG.emitSharedOrigLValue(CGF, Cnt);
6018     llvm::Value *CastedShared =
6019         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6020     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6021     // ElemLVal.reduce_orig = &Origs[Cnt];
6022     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6023     llvm::Value *CastedOrig =
6024         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6025     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6026     RCG.emitAggregateType(CGF, Cnt);
6027     llvm::Value *SizeValInChars;
6028     llvm::Value *SizeVal;
6029     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6030     // We use delayed creation/initialization for VLAs and array sections. It is
6031     // required because runtime does not provide the way to pass the sizes of
6032     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6033     // threadprivate global variables are used to store these values and use
6034     // them in the functions.
6035     bool DelayedCreation = !!SizeVal;
6036     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6037                                                /*isSigned=*/false);
6038     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6039     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6040     // ElemLVal.reduce_init = init;
6041     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6042     llvm::Value *InitAddr =
6043         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6044     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6045     // ElemLVal.reduce_fini = fini;
6046     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6047     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6048     llvm::Value *FiniAddr = Fini
6049                                 ? CGF.EmitCastToVoidPtr(Fini)
6050                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6051     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6052     // ElemLVal.reduce_comb = comb;
6053     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6054     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6055         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6056         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6057     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6058     // ElemLVal.flags = 0;
6059     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6060     if (DelayedCreation) {
6061       CGF.EmitStoreOfScalar(
6062           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6063           FlagsLVal);
6064     } else
6065       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6066                                  FlagsLVal.getType());
6067   }
6068   if (Data.IsReductionWithTaskMod) {
6069     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6070     // is_ws, int num, void *data);
6071     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6072     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6073                                                   CGM.IntTy, /*isSigned=*/true);
6074     llvm::Value *Args[] = {
6075         IdentTLoc, GTid,
6076         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6077                                /*isSigned=*/true),
6078         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6079         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6080             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6081     return CGF.EmitRuntimeCall(
6082         OMPBuilder.getOrCreateRuntimeFunction(
6083             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6084         Args);
6085   }
6086   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6087   llvm::Value *Args[] = {
6088       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6089                                 /*isSigned=*/true),
6090       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6091       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6092                                                       CGM.VoidPtrTy)};
6093   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6094                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6095                              Args);
6096 }
6097 
6098 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6099                                             SourceLocation Loc,
6100                                             bool IsWorksharingReduction) {
6101   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6102   // is_ws, int num, void *data);
6103   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6104   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6105                                                 CGM.IntTy, /*isSigned=*/true);
6106   llvm::Value *Args[] = {IdentTLoc, GTid,
6107                          llvm::ConstantInt::get(CGM.IntTy,
6108                                                 IsWorksharingReduction ? 1 : 0,
6109                                                 /*isSigned=*/true)};
6110   (void)CGF.EmitRuntimeCall(
6111       OMPBuilder.getOrCreateRuntimeFunction(
6112           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6113       Args);
6114 }
6115 
6116 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6117                                               SourceLocation Loc,
6118                                               ReductionCodeGen &RCG,
6119                                               unsigned N) {
6120   auto Sizes = RCG.getSizes(N);
6121   // Emit threadprivate global variable if the type is non-constant
6122   // (Sizes.second = nullptr).
6123   if (Sizes.second) {
6124     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6125                                                      /*isSigned=*/false);
6126     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6127         CGF, CGM.getContext().getSizeType(),
6128         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6129     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6130   }
6131 }
6132 
6133 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6134                                               SourceLocation Loc,
6135                                               llvm::Value *ReductionsPtr,
6136                                               LValue SharedLVal) {
6137   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6138   // *d);
6139   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6140                                                    CGM.IntTy,
6141                                                    /*isSigned=*/true),
6142                          ReductionsPtr,
6143                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6144                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6145   return Address(
6146       CGF.EmitRuntimeCall(
6147           OMPBuilder.getOrCreateRuntimeFunction(
6148               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6149           Args),
6150       SharedLVal.getAlignment());
6151 }
6152 
6153 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6154                                        SourceLocation Loc) {
6155   if (!CGF.HaveInsertPoint())
6156     return;
6157 
6158   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6159     OMPBuilder.CreateTaskwait(CGF.Builder);
6160   } else {
6161     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6162     // global_tid);
6163     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6164     // Ignore return result until untied tasks are supported.
6165     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6166                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6167                         Args);
6168   }
6169 
6170   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6171     Region->emitUntiedSwitch(CGF);
6172 }
6173 
6174 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6175                                            OpenMPDirectiveKind InnerKind,
6176                                            const RegionCodeGenTy &CodeGen,
6177                                            bool HasCancel) {
6178   if (!CGF.HaveInsertPoint())
6179     return;
6180   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6181   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6182 }
6183 
6184 namespace {
6185 enum RTCancelKind {
6186   CancelNoreq = 0,
6187   CancelParallel = 1,
6188   CancelLoop = 2,
6189   CancelSections = 3,
6190   CancelTaskgroup = 4
6191 };
6192 } // anonymous namespace
6193 
6194 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6195   RTCancelKind CancelKind = CancelNoreq;
6196   if (CancelRegion == OMPD_parallel)
6197     CancelKind = CancelParallel;
6198   else if (CancelRegion == OMPD_for)
6199     CancelKind = CancelLoop;
6200   else if (CancelRegion == OMPD_sections)
6201     CancelKind = CancelSections;
6202   else {
6203     assert(CancelRegion == OMPD_taskgroup);
6204     CancelKind = CancelTaskgroup;
6205   }
6206   return CancelKind;
6207 }
6208 
6209 void CGOpenMPRuntime::emitCancellationPointCall(
6210     CodeGenFunction &CGF, SourceLocation Loc,
6211     OpenMPDirectiveKind CancelRegion) {
6212   if (!CGF.HaveInsertPoint())
6213     return;
6214   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6215   // global_tid, kmp_int32 cncl_kind);
6216   if (auto *OMPRegionInfo =
6217           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6218     // For 'cancellation point taskgroup', the task region info may not have a
6219     // cancel. This may instead happen in another adjacent task.
6220     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6221       llvm::Value *Args[] = {
6222           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6223           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6224       // Ignore return result until untied tasks are supported.
6225       llvm::Value *Result = CGF.EmitRuntimeCall(
6226           OMPBuilder.getOrCreateRuntimeFunction(
6227               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6228           Args);
6229       // if (__kmpc_cancellationpoint()) {
6230       //   exit from construct;
6231       // }
6232       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6233       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6234       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6235       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6236       CGF.EmitBlock(ExitBB);
6237       // exit from construct;
6238       CodeGenFunction::JumpDest CancelDest =
6239           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6240       CGF.EmitBranchThroughCleanup(CancelDest);
6241       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6242     }
6243   }
6244 }
6245 
6246 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6247                                      const Expr *IfCond,
6248                                      OpenMPDirectiveKind CancelRegion) {
6249   if (!CGF.HaveInsertPoint())
6250     return;
6251   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6252   // kmp_int32 cncl_kind);
6253   auto &M = CGM.getModule();
6254   if (auto *OMPRegionInfo =
6255           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6256     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6257                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6258       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6259       llvm::Value *Args[] = {
6260           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6261           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6262       // Ignore return result until untied tasks are supported.
6263       llvm::Value *Result = CGF.EmitRuntimeCall(
6264           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6265       // if (__kmpc_cancel()) {
6266       //   exit from construct;
6267       // }
6268       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6269       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6270       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6271       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6272       CGF.EmitBlock(ExitBB);
6273       // exit from construct;
6274       CodeGenFunction::JumpDest CancelDest =
6275           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6276       CGF.EmitBranchThroughCleanup(CancelDest);
6277       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6278     };
6279     if (IfCond) {
6280       emitIfClause(CGF, IfCond, ThenGen,
6281                    [](CodeGenFunction &, PrePostActionTy &) {});
6282     } else {
6283       RegionCodeGenTy ThenRCG(ThenGen);
6284       ThenRCG(CGF);
6285     }
6286   }
6287 }
6288 
6289 namespace {
6290 /// Cleanup action for uses_allocators support.
6291 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6292   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6293 
6294 public:
6295   OMPUsesAllocatorsActionTy(
6296       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6297       : Allocators(Allocators) {}
6298   void Enter(CodeGenFunction &CGF) override {
6299     if (!CGF.HaveInsertPoint())
6300       return;
6301     for (const auto &AllocatorData : Allocators) {
6302       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6303           CGF, AllocatorData.first, AllocatorData.second);
6304     }
6305   }
6306   void Exit(CodeGenFunction &CGF) override {
6307     if (!CGF.HaveInsertPoint())
6308       return;
6309     for (const auto &AllocatorData : Allocators) {
6310       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6311                                                         AllocatorData.first);
6312     }
6313   }
6314 };
6315 } // namespace
6316 
6317 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6318     const OMPExecutableDirective &D, StringRef ParentName,
6319     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6320     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6321   assert(!ParentName.empty() && "Invalid target region parent name!");
6322   HasEmittedTargetRegion = true;
6323   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6324   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6325     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6326       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6327       if (!D.AllocatorTraits)
6328         continue;
6329       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6330     }
6331   }
6332   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6333   CodeGen.setAction(UsesAllocatorAction);
6334   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6335                                    IsOffloadEntry, CodeGen);
6336 }
6337 
6338 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6339                                              const Expr *Allocator,
6340                                              const Expr *AllocatorTraits) {
6341   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6342   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6343   // Use default memspace handle.
6344   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6345   llvm::Value *NumTraits = llvm::ConstantInt::get(
6346       CGF.IntTy, cast<ConstantArrayType>(
6347                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6348                      ->getSize()
6349                      .getLimitedValue());
6350   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6351   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6352       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6353   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6354                                            AllocatorTraitsLVal.getBaseInfo(),
6355                                            AllocatorTraitsLVal.getTBAAInfo());
6356   llvm::Value *Traits =
6357       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6358 
6359   llvm::Value *AllocatorVal =
6360       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6361                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6362                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6363   // Store to allocator.
6364   CGF.EmitVarDecl(*cast<VarDecl>(
6365       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6366   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6367   AllocatorVal =
6368       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6369                                Allocator->getType(), Allocator->getExprLoc());
6370   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6371 }
6372 
6373 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6374                                              const Expr *Allocator) {
6375   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6376   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6377   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6378   llvm::Value *AllocatorVal =
6379       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6380   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6381                                           CGF.getContext().VoidPtrTy,
6382                                           Allocator->getExprLoc());
6383   (void)CGF.EmitRuntimeCall(
6384       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6385                                             OMPRTL___kmpc_destroy_allocator),
6386       {ThreadId, AllocatorVal});
6387 }
6388 
6389 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6390     const OMPExecutableDirective &D, StringRef ParentName,
6391     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6392     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6393   // Create a unique name for the entry function using the source location
6394   // information of the current target region. The name will be something like:
6395   //
6396   // __omp_offloading_DD_FFFF_PP_lBB
6397   //
6398   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6399   // mangled name of the function that encloses the target region and BB is the
6400   // line number of the target region.
6401 
6402   unsigned DeviceID;
6403   unsigned FileID;
6404   unsigned Line;
6405   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6406                            Line);
6407   SmallString<64> EntryFnName;
6408   {
6409     llvm::raw_svector_ostream OS(EntryFnName);
6410     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6411        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6412   }
6413 
6414   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6415 
6416   CodeGenFunction CGF(CGM, true);
6417   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6418   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6419 
6420   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6421 
6422   // If this target outline function is not an offload entry, we don't need to
6423   // register it.
6424   if (!IsOffloadEntry)
6425     return;
6426 
6427   // The target region ID is used by the runtime library to identify the current
6428   // target region, so it only has to be unique and not necessarily point to
6429   // anything. It could be the pointer to the outlined function that implements
6430   // the target region, but we aren't using that so that the compiler doesn't
6431   // need to keep that, and could therefore inline the host function if proven
6432   // worthwhile during optimization. In the other hand, if emitting code for the
6433   // device, the ID has to be the function address so that it can retrieved from
6434   // the offloading entry and launched by the runtime library. We also mark the
6435   // outlined function to have external linkage in case we are emitting code for
6436   // the device, because these functions will be entry points to the device.
6437 
6438   if (CGM.getLangOpts().OpenMPIsDevice) {
6439     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6440     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6441     OutlinedFn->setDSOLocal(false);
6442   } else {
6443     std::string Name = getName({EntryFnName, "region_id"});
6444     OutlinedFnID = new llvm::GlobalVariable(
6445         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6446         llvm::GlobalValue::WeakAnyLinkage,
6447         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6448   }
6449 
6450   // Register the information for the entry associated with this target region.
6451   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6452       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6453       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6454 }
6455 
6456 /// Checks if the expression is constant or does not have non-trivial function
6457 /// calls.
6458 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6459   // We can skip constant expressions.
6460   // We can skip expressions with trivial calls or simple expressions.
6461   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6462           !E->hasNonTrivialCall(Ctx)) &&
6463          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6464 }
6465 
6466 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6467                                                     const Stmt *Body) {
6468   const Stmt *Child = Body->IgnoreContainers();
6469   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6470     Child = nullptr;
6471     for (const Stmt *S : C->body()) {
6472       if (const auto *E = dyn_cast<Expr>(S)) {
6473         if (isTrivial(Ctx, E))
6474           continue;
6475       }
6476       // Some of the statements can be ignored.
6477       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6478           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6479         continue;
6480       // Analyze declarations.
6481       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6482         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6483               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6484                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6485                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6486                   isa<UsingDirectiveDecl>(D) ||
6487                   isa<OMPDeclareReductionDecl>(D) ||
6488                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6489                 return true;
6490               const auto *VD = dyn_cast<VarDecl>(D);
6491               if (!VD)
6492                 return false;
6493               return VD->isConstexpr() ||
6494                      ((VD->getType().isTrivialType(Ctx) ||
6495                        VD->getType()->isReferenceType()) &&
6496                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6497             }))
6498           continue;
6499       }
6500       // Found multiple children - cannot get the one child only.
6501       if (Child)
6502         return nullptr;
6503       Child = S;
6504     }
6505     if (Child)
6506       Child = Child->IgnoreContainers();
6507   }
6508   return Child;
6509 }
6510 
6511 /// Emit the number of teams for a target directive.  Inspect the num_teams
6512 /// clause associated with a teams construct combined or closely nested
6513 /// with the target directive.
6514 ///
6515 /// Emit a team of size one for directives such as 'target parallel' that
6516 /// have no associated teams construct.
6517 ///
6518 /// Otherwise, return nullptr.
6519 static llvm::Value *
6520 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6521                                const OMPExecutableDirective &D) {
6522   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6523          "Clauses associated with the teams directive expected to be emitted "
6524          "only for the host!");
6525   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6526   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6527          "Expected target-based executable directive.");
6528   CGBuilderTy &Bld = CGF.Builder;
6529   switch (DirectiveKind) {
6530   case OMPD_target: {
6531     const auto *CS = D.getInnermostCapturedStmt();
6532     const auto *Body =
6533         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6534     const Stmt *ChildStmt =
6535         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6536     if (const auto *NestedDir =
6537             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6538       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6539         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6540           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6541           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6542           const Expr *NumTeams =
6543               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6544           llvm::Value *NumTeamsVal =
6545               CGF.EmitScalarExpr(NumTeams,
6546                                  /*IgnoreResultAssign*/ true);
6547           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6548                                    /*isSigned=*/true);
6549         }
6550         return Bld.getInt32(0);
6551       }
6552       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6553           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6554         return Bld.getInt32(1);
6555       return Bld.getInt32(0);
6556     }
6557     return nullptr;
6558   }
6559   case OMPD_target_teams:
6560   case OMPD_target_teams_distribute:
6561   case OMPD_target_teams_distribute_simd:
6562   case OMPD_target_teams_distribute_parallel_for:
6563   case OMPD_target_teams_distribute_parallel_for_simd: {
6564     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6565       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6566       const Expr *NumTeams =
6567           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6568       llvm::Value *NumTeamsVal =
6569           CGF.EmitScalarExpr(NumTeams,
6570                              /*IgnoreResultAssign*/ true);
6571       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6572                                /*isSigned=*/true);
6573     }
6574     return Bld.getInt32(0);
6575   }
6576   case OMPD_target_parallel:
6577   case OMPD_target_parallel_for:
6578   case OMPD_target_parallel_for_simd:
6579   case OMPD_target_simd:
6580     return Bld.getInt32(1);
6581   case OMPD_parallel:
6582   case OMPD_for:
6583   case OMPD_parallel_for:
6584   case OMPD_parallel_master:
6585   case OMPD_parallel_sections:
6586   case OMPD_for_simd:
6587   case OMPD_parallel_for_simd:
6588   case OMPD_cancel:
6589   case OMPD_cancellation_point:
6590   case OMPD_ordered:
6591   case OMPD_threadprivate:
6592   case OMPD_allocate:
6593   case OMPD_task:
6594   case OMPD_simd:
6595   case OMPD_sections:
6596   case OMPD_section:
6597   case OMPD_single:
6598   case OMPD_master:
6599   case OMPD_critical:
6600   case OMPD_taskyield:
6601   case OMPD_barrier:
6602   case OMPD_taskwait:
6603   case OMPD_taskgroup:
6604   case OMPD_atomic:
6605   case OMPD_flush:
6606   case OMPD_depobj:
6607   case OMPD_scan:
6608   case OMPD_teams:
6609   case OMPD_target_data:
6610   case OMPD_target_exit_data:
6611   case OMPD_target_enter_data:
6612   case OMPD_distribute:
6613   case OMPD_distribute_simd:
6614   case OMPD_distribute_parallel_for:
6615   case OMPD_distribute_parallel_for_simd:
6616   case OMPD_teams_distribute:
6617   case OMPD_teams_distribute_simd:
6618   case OMPD_teams_distribute_parallel_for:
6619   case OMPD_teams_distribute_parallel_for_simd:
6620   case OMPD_target_update:
6621   case OMPD_declare_simd:
6622   case OMPD_declare_variant:
6623   case OMPD_begin_declare_variant:
6624   case OMPD_end_declare_variant:
6625   case OMPD_declare_target:
6626   case OMPD_end_declare_target:
6627   case OMPD_declare_reduction:
6628   case OMPD_declare_mapper:
6629   case OMPD_taskloop:
6630   case OMPD_taskloop_simd:
6631   case OMPD_master_taskloop:
6632   case OMPD_master_taskloop_simd:
6633   case OMPD_parallel_master_taskloop:
6634   case OMPD_parallel_master_taskloop_simd:
6635   case OMPD_requires:
6636   case OMPD_unknown:
6637     break;
6638   default:
6639     break;
6640   }
6641   llvm_unreachable("Unexpected directive kind.");
6642 }
6643 
6644 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6645                                   llvm::Value *DefaultThreadLimitVal) {
6646   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6647       CGF.getContext(), CS->getCapturedStmt());
6648   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6649     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6650       llvm::Value *NumThreads = nullptr;
6651       llvm::Value *CondVal = nullptr;
6652       // Handle if clause. If if clause present, the number of threads is
6653       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6654       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6655         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6656         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6657         const OMPIfClause *IfClause = nullptr;
6658         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6659           if (C->getNameModifier() == OMPD_unknown ||
6660               C->getNameModifier() == OMPD_parallel) {
6661             IfClause = C;
6662             break;
6663           }
6664         }
6665         if (IfClause) {
6666           const Expr *Cond = IfClause->getCondition();
6667           bool Result;
6668           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6669             if (!Result)
6670               return CGF.Builder.getInt32(1);
6671           } else {
6672             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6673             if (const auto *PreInit =
6674                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6675               for (const auto *I : PreInit->decls()) {
6676                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6677                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6678                 } else {
6679                   CodeGenFunction::AutoVarEmission Emission =
6680                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6681                   CGF.EmitAutoVarCleanups(Emission);
6682                 }
6683               }
6684             }
6685             CondVal = CGF.EvaluateExprAsBool(Cond);
6686           }
6687         }
6688       }
6689       // Check the value of num_threads clause iff if clause was not specified
6690       // or is not evaluated to false.
6691       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6692         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6693         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6694         const auto *NumThreadsClause =
6695             Dir->getSingleClause<OMPNumThreadsClause>();
6696         CodeGenFunction::LexicalScope Scope(
6697             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6698         if (const auto *PreInit =
6699                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6700           for (const auto *I : PreInit->decls()) {
6701             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6702               CGF.EmitVarDecl(cast<VarDecl>(*I));
6703             } else {
6704               CodeGenFunction::AutoVarEmission Emission =
6705                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6706               CGF.EmitAutoVarCleanups(Emission);
6707             }
6708           }
6709         }
6710         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6711         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6712                                                /*isSigned=*/false);
6713         if (DefaultThreadLimitVal)
6714           NumThreads = CGF.Builder.CreateSelect(
6715               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6716               DefaultThreadLimitVal, NumThreads);
6717       } else {
6718         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6719                                            : CGF.Builder.getInt32(0);
6720       }
6721       // Process condition of the if clause.
6722       if (CondVal) {
6723         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6724                                               CGF.Builder.getInt32(1));
6725       }
6726       return NumThreads;
6727     }
6728     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6729       return CGF.Builder.getInt32(1);
6730     return DefaultThreadLimitVal;
6731   }
6732   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6733                                : CGF.Builder.getInt32(0);
6734 }
6735 
6736 /// Emit the number of threads for a target directive.  Inspect the
6737 /// thread_limit clause associated with a teams construct combined or closely
6738 /// nested with the target directive.
6739 ///
6740 /// Emit the num_threads clause for directives such as 'target parallel' that
6741 /// have no associated teams construct.
6742 ///
6743 /// Otherwise, return nullptr.
6744 static llvm::Value *
6745 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6746                                  const OMPExecutableDirective &D) {
6747   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6748          "Clauses associated with the teams directive expected to be emitted "
6749          "only for the host!");
6750   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6751   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6752          "Expected target-based executable directive.");
6753   CGBuilderTy &Bld = CGF.Builder;
6754   llvm::Value *ThreadLimitVal = nullptr;
6755   llvm::Value *NumThreadsVal = nullptr;
6756   switch (DirectiveKind) {
6757   case OMPD_target: {
6758     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6759     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6760       return NumThreads;
6761     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6762         CGF.getContext(), CS->getCapturedStmt());
6763     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6764       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6765         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6766         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6767         const auto *ThreadLimitClause =
6768             Dir->getSingleClause<OMPThreadLimitClause>();
6769         CodeGenFunction::LexicalScope Scope(
6770             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6771         if (const auto *PreInit =
6772                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6773           for (const auto *I : PreInit->decls()) {
6774             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6775               CGF.EmitVarDecl(cast<VarDecl>(*I));
6776             } else {
6777               CodeGenFunction::AutoVarEmission Emission =
6778                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6779               CGF.EmitAutoVarCleanups(Emission);
6780             }
6781           }
6782         }
6783         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6784             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6785         ThreadLimitVal =
6786             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6787       }
6788       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6789           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6790         CS = Dir->getInnermostCapturedStmt();
6791         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6792             CGF.getContext(), CS->getCapturedStmt());
6793         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6794       }
6795       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6796           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6797         CS = Dir->getInnermostCapturedStmt();
6798         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6799           return NumThreads;
6800       }
6801       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6802         return Bld.getInt32(1);
6803     }
6804     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6805   }
6806   case OMPD_target_teams: {
6807     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6808       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6809       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6810       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6811           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6812       ThreadLimitVal =
6813           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6814     }
6815     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6816     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6817       return NumThreads;
6818     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6819         CGF.getContext(), CS->getCapturedStmt());
6820     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6821       if (Dir->getDirectiveKind() == OMPD_distribute) {
6822         CS = Dir->getInnermostCapturedStmt();
6823         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6824           return NumThreads;
6825       }
6826     }
6827     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6828   }
6829   case OMPD_target_teams_distribute:
6830     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6831       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6832       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6833       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6834           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6835       ThreadLimitVal =
6836           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6837     }
6838     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6839   case OMPD_target_parallel:
6840   case OMPD_target_parallel_for:
6841   case OMPD_target_parallel_for_simd:
6842   case OMPD_target_teams_distribute_parallel_for:
6843   case OMPD_target_teams_distribute_parallel_for_simd: {
6844     llvm::Value *CondVal = nullptr;
6845     // Handle if clause. If if clause present, the number of threads is
6846     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6847     if (D.hasClausesOfKind<OMPIfClause>()) {
6848       const OMPIfClause *IfClause = nullptr;
6849       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6850         if (C->getNameModifier() == OMPD_unknown ||
6851             C->getNameModifier() == OMPD_parallel) {
6852           IfClause = C;
6853           break;
6854         }
6855       }
6856       if (IfClause) {
6857         const Expr *Cond = IfClause->getCondition();
6858         bool Result;
6859         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6860           if (!Result)
6861             return Bld.getInt32(1);
6862         } else {
6863           CodeGenFunction::RunCleanupsScope Scope(CGF);
6864           CondVal = CGF.EvaluateExprAsBool(Cond);
6865         }
6866       }
6867     }
6868     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6869       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6870       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6871       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6872           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6873       ThreadLimitVal =
6874           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6875     }
6876     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6877       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6878       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6879       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6880           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6881       NumThreadsVal =
6882           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6883       ThreadLimitVal = ThreadLimitVal
6884                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6885                                                                 ThreadLimitVal),
6886                                               NumThreadsVal, ThreadLimitVal)
6887                            : NumThreadsVal;
6888     }
6889     if (!ThreadLimitVal)
6890       ThreadLimitVal = Bld.getInt32(0);
6891     if (CondVal)
6892       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6893     return ThreadLimitVal;
6894   }
6895   case OMPD_target_teams_distribute_simd:
6896   case OMPD_target_simd:
6897     return Bld.getInt32(1);
6898   case OMPD_parallel:
6899   case OMPD_for:
6900   case OMPD_parallel_for:
6901   case OMPD_parallel_master:
6902   case OMPD_parallel_sections:
6903   case OMPD_for_simd:
6904   case OMPD_parallel_for_simd:
6905   case OMPD_cancel:
6906   case OMPD_cancellation_point:
6907   case OMPD_ordered:
6908   case OMPD_threadprivate:
6909   case OMPD_allocate:
6910   case OMPD_task:
6911   case OMPD_simd:
6912   case OMPD_sections:
6913   case OMPD_section:
6914   case OMPD_single:
6915   case OMPD_master:
6916   case OMPD_critical:
6917   case OMPD_taskyield:
6918   case OMPD_barrier:
6919   case OMPD_taskwait:
6920   case OMPD_taskgroup:
6921   case OMPD_atomic:
6922   case OMPD_flush:
6923   case OMPD_depobj:
6924   case OMPD_scan:
6925   case OMPD_teams:
6926   case OMPD_target_data:
6927   case OMPD_target_exit_data:
6928   case OMPD_target_enter_data:
6929   case OMPD_distribute:
6930   case OMPD_distribute_simd:
6931   case OMPD_distribute_parallel_for:
6932   case OMPD_distribute_parallel_for_simd:
6933   case OMPD_teams_distribute:
6934   case OMPD_teams_distribute_simd:
6935   case OMPD_teams_distribute_parallel_for:
6936   case OMPD_teams_distribute_parallel_for_simd:
6937   case OMPD_target_update:
6938   case OMPD_declare_simd:
6939   case OMPD_declare_variant:
6940   case OMPD_begin_declare_variant:
6941   case OMPD_end_declare_variant:
6942   case OMPD_declare_target:
6943   case OMPD_end_declare_target:
6944   case OMPD_declare_reduction:
6945   case OMPD_declare_mapper:
6946   case OMPD_taskloop:
6947   case OMPD_taskloop_simd:
6948   case OMPD_master_taskloop:
6949   case OMPD_master_taskloop_simd:
6950   case OMPD_parallel_master_taskloop:
6951   case OMPD_parallel_master_taskloop_simd:
6952   case OMPD_requires:
6953   case OMPD_unknown:
6954     break;
6955   default:
6956     break;
6957   }
6958   llvm_unreachable("Unsupported directive kind.");
6959 }
6960 
6961 namespace {
6962 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6963 
6964 // Utility to handle information from clauses associated with a given
6965 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6966 // It provides a convenient interface to obtain the information and generate
6967 // code for that information.
6968 class MappableExprsHandler {
6969 public:
6970   /// Values for bit flags used to specify the mapping type for
6971   /// offloading.
6972   enum OpenMPOffloadMappingFlags : uint64_t {
6973     /// No flags
6974     OMP_MAP_NONE = 0x0,
6975     /// Allocate memory on the device and move data from host to device.
6976     OMP_MAP_TO = 0x01,
6977     /// Allocate memory on the device and move data from device to host.
6978     OMP_MAP_FROM = 0x02,
6979     /// Always perform the requested mapping action on the element, even
6980     /// if it was already mapped before.
6981     OMP_MAP_ALWAYS = 0x04,
6982     /// Delete the element from the device environment, ignoring the
6983     /// current reference count associated with the element.
6984     OMP_MAP_DELETE = 0x08,
6985     /// The element being mapped is a pointer-pointee pair; both the
6986     /// pointer and the pointee should be mapped.
6987     OMP_MAP_PTR_AND_OBJ = 0x10,
6988     /// This flags signals that the base address of an entry should be
6989     /// passed to the target kernel as an argument.
6990     OMP_MAP_TARGET_PARAM = 0x20,
6991     /// Signal that the runtime library has to return the device pointer
6992     /// in the current position for the data being mapped. Used when we have the
6993     /// use_device_ptr or use_device_addr clause.
6994     OMP_MAP_RETURN_PARAM = 0x40,
6995     /// This flag signals that the reference being passed is a pointer to
6996     /// private data.
6997     OMP_MAP_PRIVATE = 0x80,
6998     /// Pass the element to the device by value.
6999     OMP_MAP_LITERAL = 0x100,
7000     /// Implicit map
7001     OMP_MAP_IMPLICIT = 0x200,
7002     /// Close is a hint to the runtime to allocate memory close to
7003     /// the target device.
7004     OMP_MAP_CLOSE = 0x400,
7005     /// 0x800 is reserved for compatibility with XLC.
7006     /// Produce a runtime error if the data is not already allocated.
7007     OMP_MAP_PRESENT = 0x1000,
7008     /// The 16 MSBs of the flags indicate whether the entry is member of some
7009     /// struct/class.
7010     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7011     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7012   };
7013 
7014   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7015   static unsigned getFlagMemberOffset() {
7016     unsigned Offset = 0;
7017     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7018          Remain = Remain >> 1)
7019       Offset++;
7020     return Offset;
7021   }
7022 
7023   /// Class that associates information with a base pointer to be passed to the
7024   /// runtime library.
7025   class BasePointerInfo {
7026     /// The base pointer.
7027     llvm::Value *Ptr = nullptr;
7028     /// The base declaration that refers to this device pointer, or null if
7029     /// there is none.
7030     const ValueDecl *DevPtrDecl = nullptr;
7031 
7032   public:
7033     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7034         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7035     llvm::Value *operator*() const { return Ptr; }
7036     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7037     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7038   };
7039 
7040   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7041   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7042   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7043   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7044 
7045   /// This structure contains combined information generated for mappable
7046   /// clauses, including base pointers, pointers, sizes, map types, and
7047   /// user-defined mappers.
7048   struct MapCombinedInfoTy {
7049     MapBaseValuesArrayTy BasePointers;
7050     MapValuesArrayTy Pointers;
7051     MapValuesArrayTy Sizes;
7052     MapFlagsArrayTy Types;
7053     MapMappersArrayTy Mappers;
7054 
7055     /// Append arrays in \a CurInfo.
7056     void append(MapCombinedInfoTy &CurInfo) {
7057       BasePointers.append(CurInfo.BasePointers.begin(),
7058                           CurInfo.BasePointers.end());
7059       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7060       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7061       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7062       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7063     }
7064   };
7065 
7066   /// Map between a struct and the its lowest & highest elements which have been
7067   /// mapped.
7068   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7069   ///                    HE(FieldIndex, Pointer)}
7070   struct StructRangeInfoTy {
7071     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7072         0, Address::invalid()};
7073     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7074         0, Address::invalid()};
7075     Address Base = Address::invalid();
7076   };
7077 
7078 private:
7079   /// Kind that defines how a device pointer has to be returned.
7080   struct MapInfo {
7081     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7082     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7083     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7084     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7085     bool ReturnDevicePointer = false;
7086     bool IsImplicit = false;
7087     const ValueDecl *Mapper = nullptr;
7088     bool ForDeviceAddr = false;
7089 
7090     MapInfo() = default;
7091     MapInfo(
7092         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7093         OpenMPMapClauseKind MapType,
7094         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7095         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7096         bool ReturnDevicePointer, bool IsImplicit,
7097         const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false)
7098         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7099           MotionModifiers(MotionModifiers),
7100           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7101           Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {}
7102   };
7103 
7104   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7105   /// member and there is no map information about it, then emission of that
7106   /// entry is deferred until the whole struct has been processed.
7107   struct DeferredDevicePtrEntryTy {
7108     const Expr *IE = nullptr;
7109     const ValueDecl *VD = nullptr;
7110     bool ForDeviceAddr = false;
7111 
7112     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7113                              bool ForDeviceAddr)
7114         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7115   };
7116 
7117   /// The target directive from where the mappable clauses were extracted. It
7118   /// is either a executable directive or a user-defined mapper directive.
7119   llvm::PointerUnion<const OMPExecutableDirective *,
7120                      const OMPDeclareMapperDecl *>
7121       CurDir;
7122 
7123   /// Function the directive is being generated for.
7124   CodeGenFunction &CGF;
7125 
7126   /// Set of all first private variables in the current directive.
7127   /// bool data is set to true if the variable is implicitly marked as
7128   /// firstprivate, false otherwise.
7129   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7130 
7131   /// Map between device pointer declarations and their expression components.
7132   /// The key value for declarations in 'this' is null.
7133   llvm::DenseMap<
7134       const ValueDecl *,
7135       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7136       DevPointersMap;
7137 
7138   llvm::Value *getExprTypeSize(const Expr *E) const {
7139     QualType ExprTy = E->getType().getCanonicalType();
7140 
7141     // Calculate the size for array shaping expression.
7142     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7143       llvm::Value *Size =
7144           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7145       for (const Expr *SE : OAE->getDimensions()) {
7146         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7147         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7148                                       CGF.getContext().getSizeType(),
7149                                       SE->getExprLoc());
7150         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7151       }
7152       return Size;
7153     }
7154 
7155     // Reference types are ignored for mapping purposes.
7156     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7157       ExprTy = RefTy->getPointeeType().getCanonicalType();
7158 
7159     // Given that an array section is considered a built-in type, we need to
7160     // do the calculation based on the length of the section instead of relying
7161     // on CGF.getTypeSize(E->getType()).
7162     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7163       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7164                             OAE->getBase()->IgnoreParenImpCasts())
7165                             .getCanonicalType();
7166 
7167       // If there is no length associated with the expression and lower bound is
7168       // not specified too, that means we are using the whole length of the
7169       // base.
7170       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7171           !OAE->getLowerBound())
7172         return CGF.getTypeSize(BaseTy);
7173 
7174       llvm::Value *ElemSize;
7175       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7176         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7177       } else {
7178         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7179         assert(ATy && "Expecting array type if not a pointer type.");
7180         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7181       }
7182 
7183       // If we don't have a length at this point, that is because we have an
7184       // array section with a single element.
7185       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7186         return ElemSize;
7187 
7188       if (const Expr *LenExpr = OAE->getLength()) {
7189         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7190         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7191                                              CGF.getContext().getSizeType(),
7192                                              LenExpr->getExprLoc());
7193         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7194       }
7195       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7196              OAE->getLowerBound() && "expected array_section[lb:].");
7197       // Size = sizetype - lb * elemtype;
7198       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7199       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7200       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7201                                        CGF.getContext().getSizeType(),
7202                                        OAE->getLowerBound()->getExprLoc());
7203       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7204       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7205       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7206       LengthVal = CGF.Builder.CreateSelect(
7207           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7208       return LengthVal;
7209     }
7210     return CGF.getTypeSize(ExprTy);
7211   }
7212 
7213   /// Return the corresponding bits for a given map clause modifier. Add
7214   /// a flag marking the map as a pointer if requested. Add a flag marking the
7215   /// map as the first one of a series of maps that relate to the same map
7216   /// expression.
7217   OpenMPOffloadMappingFlags getMapTypeBits(
7218       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7219       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7220       bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7221     OpenMPOffloadMappingFlags Bits =
7222         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7223     switch (MapType) {
7224     case OMPC_MAP_alloc:
7225     case OMPC_MAP_release:
7226       // alloc and release is the default behavior in the runtime library,  i.e.
7227       // if we don't pass any bits alloc/release that is what the runtime is
7228       // going to do. Therefore, we don't need to signal anything for these two
7229       // type modifiers.
7230       break;
7231     case OMPC_MAP_to:
7232       Bits |= OMP_MAP_TO;
7233       break;
7234     case OMPC_MAP_from:
7235       Bits |= OMP_MAP_FROM;
7236       break;
7237     case OMPC_MAP_tofrom:
7238       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7239       break;
7240     case OMPC_MAP_delete:
7241       Bits |= OMP_MAP_DELETE;
7242       break;
7243     case OMPC_MAP_unknown:
7244       llvm_unreachable("Unexpected map type!");
7245     }
7246     if (AddPtrFlag)
7247       Bits |= OMP_MAP_PTR_AND_OBJ;
7248     if (AddIsTargetParamFlag)
7249       Bits |= OMP_MAP_TARGET_PARAM;
7250     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7251         != MapModifiers.end())
7252       Bits |= OMP_MAP_ALWAYS;
7253     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7254         != MapModifiers.end())
7255       Bits |= OMP_MAP_CLOSE;
7256     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present)
7257         != MapModifiers.end())
7258       Bits |= OMP_MAP_PRESENT;
7259     if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present)
7260         != MotionModifiers.end())
7261       Bits |= OMP_MAP_PRESENT;
7262     return Bits;
7263   }
7264 
7265   /// Return true if the provided expression is a final array section. A
7266   /// final array section, is one whose length can't be proved to be one.
7267   bool isFinalArraySectionExpression(const Expr *E) const {
7268     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7269 
7270     // It is not an array section and therefore not a unity-size one.
7271     if (!OASE)
7272       return false;
7273 
7274     // An array section with no colon always refer to a single element.
7275     if (OASE->getColonLocFirst().isInvalid())
7276       return false;
7277 
7278     const Expr *Length = OASE->getLength();
7279 
7280     // If we don't have a length we have to check if the array has size 1
7281     // for this dimension. Also, we should always expect a length if the
7282     // base type is pointer.
7283     if (!Length) {
7284       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7285                              OASE->getBase()->IgnoreParenImpCasts())
7286                              .getCanonicalType();
7287       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7288         return ATy->getSize().getSExtValue() != 1;
7289       // If we don't have a constant dimension length, we have to consider
7290       // the current section as having any size, so it is not necessarily
7291       // unitary. If it happen to be unity size, that's user fault.
7292       return true;
7293     }
7294 
7295     // Check if the length evaluates to 1.
7296     Expr::EvalResult Result;
7297     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7298       return true; // Can have more that size 1.
7299 
7300     llvm::APSInt ConstLength = Result.Val.getInt();
7301     return ConstLength.getSExtValue() != 1;
7302   }
7303 
7304   /// Generate the base pointers, section pointers, sizes, map type bits, and
7305   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7306   /// map type, map or motion modifiers, and expression components.
7307   /// \a IsFirstComponent should be set to true if the provided set of
7308   /// components is the first associated with a capture.
7309   void generateInfoForComponentList(
7310       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7311       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7312       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7313       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7314       bool IsFirstComponentList, bool IsImplicit,
7315       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7316       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7317           OverlappedElements = llvm::None) const {
7318     // The following summarizes what has to be generated for each map and the
7319     // types below. The generated information is expressed in this order:
7320     // base pointer, section pointer, size, flags
7321     // (to add to the ones that come from the map type and modifier).
7322     //
7323     // double d;
7324     // int i[100];
7325     // float *p;
7326     //
7327     // struct S1 {
7328     //   int i;
7329     //   float f[50];
7330     // }
7331     // struct S2 {
7332     //   int i;
7333     //   float f[50];
7334     //   S1 s;
7335     //   double *p;
7336     //   struct S2 *ps;
7337     // }
7338     // S2 s;
7339     // S2 *ps;
7340     //
7341     // map(d)
7342     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7343     //
7344     // map(i)
7345     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7346     //
7347     // map(i[1:23])
7348     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7349     //
7350     // map(p)
7351     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7352     //
7353     // map(p[1:24])
7354     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7355     // in unified shared memory mode or for local pointers
7356     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7357     //
7358     // map(s)
7359     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7360     //
7361     // map(s.i)
7362     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7363     //
7364     // map(s.s.f)
7365     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7366     //
7367     // map(s.p)
7368     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7369     //
7370     // map(to: s.p[:22])
7371     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7372     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7373     // &(s.p), &(s.p[0]), 22*sizeof(double),
7374     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7375     // (*) alloc space for struct members, only this is a target parameter
7376     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7377     //      optimizes this entry out, same in the examples below)
7378     // (***) map the pointee (map: to)
7379     //
7380     // map(s.ps)
7381     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7382     //
7383     // map(from: s.ps->s.i)
7384     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7385     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7386     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7387     //
7388     // map(to: s.ps->ps)
7389     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7390     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7391     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7392     //
7393     // map(s.ps->ps->ps)
7394     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7395     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7396     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7397     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7398     //
7399     // map(to: s.ps->ps->s.f[:22])
7400     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7401     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7402     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7403     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7404     //
7405     // map(ps)
7406     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7407     //
7408     // map(ps->i)
7409     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7410     //
7411     // map(ps->s.f)
7412     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7413     //
7414     // map(from: ps->p)
7415     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7416     //
7417     // map(to: ps->p[:22])
7418     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7419     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7420     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7421     //
7422     // map(ps->ps)
7423     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7424     //
7425     // map(from: ps->ps->s.i)
7426     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7427     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7428     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7429     //
7430     // map(from: ps->ps->ps)
7431     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7432     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7433     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7434     //
7435     // map(ps->ps->ps->ps)
7436     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7437     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7438     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7439     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7440     //
7441     // map(to: ps->ps->ps->s.f[:22])
7442     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7443     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7444     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7445     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7446     //
7447     // map(to: s.f[:22]) map(from: s.p[:33])
7448     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7449     //     sizeof(double*) (**), TARGET_PARAM
7450     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7451     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7452     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7453     // (*) allocate contiguous space needed to fit all mapped members even if
7454     //     we allocate space for members not mapped (in this example,
7455     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7456     //     them as well because they fall between &s.f[0] and &s.p)
7457     //
7458     // map(from: s.f[:22]) map(to: ps->p[:33])
7459     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7460     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7461     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7462     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7463     // (*) the struct this entry pertains to is the 2nd element in the list of
7464     //     arguments, hence MEMBER_OF(2)
7465     //
7466     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7467     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7468     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7469     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7470     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7471     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7472     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7473     // (*) the struct this entry pertains to is the 4th element in the list
7474     //     of arguments, hence MEMBER_OF(4)
7475 
7476     // Track if the map information being generated is the first for a capture.
7477     bool IsCaptureFirstInfo = IsFirstComponentList;
7478     // When the variable is on a declare target link or in a to clause with
7479     // unified memory, a reference is needed to hold the host/device address
7480     // of the variable.
7481     bool RequiresReference = false;
7482 
7483     // Scan the components from the base to the complete expression.
7484     auto CI = Components.rbegin();
7485     auto CE = Components.rend();
7486     auto I = CI;
7487 
7488     // Track if the map information being generated is the first for a list of
7489     // components.
7490     bool IsExpressionFirstInfo = true;
7491     bool FirstPointerInComplexData = false;
7492     Address BP = Address::invalid();
7493     const Expr *AssocExpr = I->getAssociatedExpression();
7494     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7495     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7496     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7497 
7498     if (isa<MemberExpr>(AssocExpr)) {
7499       // The base is the 'this' pointer. The content of the pointer is going
7500       // to be the base of the field being mapped.
7501       BP = CGF.LoadCXXThisAddress();
7502     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7503                (OASE &&
7504                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7505       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7506     } else if (OAShE &&
7507                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7508       BP = Address(
7509           CGF.EmitScalarExpr(OAShE->getBase()),
7510           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7511     } else {
7512       // The base is the reference to the variable.
7513       // BP = &Var.
7514       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7515       if (const auto *VD =
7516               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7517         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7518                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7519           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7520               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7521                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7522             RequiresReference = true;
7523             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7524           }
7525         }
7526       }
7527 
7528       // If the variable is a pointer and is being dereferenced (i.e. is not
7529       // the last component), the base has to be the pointer itself, not its
7530       // reference. References are ignored for mapping purposes.
7531       QualType Ty =
7532           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7533       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7534         // No need to generate individual map information for the pointer, it
7535         // can be associated with the combined storage if shared memory mode is
7536         // active or the base declaration is not global variable.
7537         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7538         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7539             !VD || VD->hasLocalStorage())
7540           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7541         else
7542           FirstPointerInComplexData = true;
7543         ++I;
7544       }
7545     }
7546 
7547     // Track whether a component of the list should be marked as MEMBER_OF some
7548     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7549     // in a component list should be marked as MEMBER_OF, all subsequent entries
7550     // do not belong to the base struct. E.g.
7551     // struct S2 s;
7552     // s.ps->ps->ps->f[:]
7553     //   (1) (2) (3) (4)
7554     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7555     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7556     // is the pointee of ps(2) which is not member of struct s, so it should not
7557     // be marked as such (it is still PTR_AND_OBJ).
7558     // The variable is initialized to false so that PTR_AND_OBJ entries which
7559     // are not struct members are not considered (e.g. array of pointers to
7560     // data).
7561     bool ShouldBeMemberOf = false;
7562 
7563     // Variable keeping track of whether or not we have encountered a component
7564     // in the component list which is a member expression. Useful when we have a
7565     // pointer or a final array section, in which case it is the previous
7566     // component in the list which tells us whether we have a member expression.
7567     // E.g. X.f[:]
7568     // While processing the final array section "[:]" it is "f" which tells us
7569     // whether we are dealing with a member of a declared struct.
7570     const MemberExpr *EncounteredME = nullptr;
7571 
7572     for (; I != CE; ++I) {
7573       // If the current component is member of a struct (parent struct) mark it.
7574       if (!EncounteredME) {
7575         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7576         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7577         // as MEMBER_OF the parent struct.
7578         if (EncounteredME) {
7579           ShouldBeMemberOf = true;
7580           // Do not emit as complex pointer if this is actually not array-like
7581           // expression.
7582           if (FirstPointerInComplexData) {
7583             QualType Ty = std::prev(I)
7584                               ->getAssociatedDeclaration()
7585                               ->getType()
7586                               .getNonReferenceType();
7587             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7588             FirstPointerInComplexData = false;
7589           }
7590         }
7591       }
7592 
7593       auto Next = std::next(I);
7594 
7595       // We need to generate the addresses and sizes if this is the last
7596       // component, if the component is a pointer or if it is an array section
7597       // whose length can't be proved to be one. If this is a pointer, it
7598       // becomes the base address for the following components.
7599 
7600       // A final array section, is one whose length can't be proved to be one.
7601       bool IsFinalArraySection =
7602           isFinalArraySectionExpression(I->getAssociatedExpression());
7603 
7604       // Get information on whether the element is a pointer. Have to do a
7605       // special treatment for array sections given that they are built-in
7606       // types.
7607       const auto *OASE =
7608           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7609       const auto *OAShE =
7610           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7611       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7612       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7613       bool IsPointer =
7614           OAShE ||
7615           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7616                        .getCanonicalType()
7617                        ->isAnyPointerType()) ||
7618           I->getAssociatedExpression()->getType()->isAnyPointerType();
7619       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7620 
7621       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7622         // If this is not the last component, we expect the pointer to be
7623         // associated with an array expression or member expression.
7624         assert((Next == CE ||
7625                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7626                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7627                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7628                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7629                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7630                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7631                "Unexpected expression");
7632 
7633         Address LB = Address::invalid();
7634         if (OAShE) {
7635           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7636                        CGF.getContext().getTypeAlignInChars(
7637                            OAShE->getBase()->getType()));
7638         } else {
7639           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7640                    .getAddress(CGF);
7641         }
7642 
7643         // If this component is a pointer inside the base struct then we don't
7644         // need to create any entry for it - it will be combined with the object
7645         // it is pointing to into a single PTR_AND_OBJ entry.
7646         bool IsMemberPointerOrAddr =
7647             (IsPointer || ForDeviceAddr) && EncounteredME &&
7648             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7649              EncounteredME);
7650         if (!OverlappedElements.empty()) {
7651           // Handle base element with the info for overlapped elements.
7652           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7653           assert(Next == CE &&
7654                  "Expected last element for the overlapped elements.");
7655           assert(!IsPointer &&
7656                  "Unexpected base element with the pointer type.");
7657           // Mark the whole struct as the struct that requires allocation on the
7658           // device.
7659           PartialStruct.LowestElem = {0, LB};
7660           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7661               I->getAssociatedExpression()->getType());
7662           Address HB = CGF.Builder.CreateConstGEP(
7663               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7664                                                               CGF.VoidPtrTy),
7665               TypeSize.getQuantity() - 1);
7666           PartialStruct.HighestElem = {
7667               std::numeric_limits<decltype(
7668                   PartialStruct.HighestElem.first)>::max(),
7669               HB};
7670           PartialStruct.Base = BP;
7671           // Emit data for non-overlapped data.
7672           OpenMPOffloadMappingFlags Flags =
7673               OMP_MAP_MEMBER_OF |
7674               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7675                              /*AddPtrFlag=*/false,
7676                              /*AddIsTargetParamFlag=*/false);
7677           LB = BP;
7678           llvm::Value *Size = nullptr;
7679           // Do bitcopy of all non-overlapped structure elements.
7680           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7681                    Component : OverlappedElements) {
7682             Address ComponentLB = Address::invalid();
7683             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7684                  Component) {
7685               if (MC.getAssociatedDeclaration()) {
7686                 ComponentLB =
7687                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7688                         .getAddress(CGF);
7689                 Size = CGF.Builder.CreatePtrDiff(
7690                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7691                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7692                 break;
7693               }
7694             }
7695             CombinedInfo.BasePointers.push_back(BP.getPointer());
7696             CombinedInfo.Pointers.push_back(LB.getPointer());
7697             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7698                 Size, CGF.Int64Ty, /*isSigned=*/true));
7699             CombinedInfo.Types.push_back(Flags);
7700             CombinedInfo.Mappers.push_back(nullptr);
7701             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7702           }
7703           CombinedInfo.BasePointers.push_back(BP.getPointer());
7704           CombinedInfo.Pointers.push_back(LB.getPointer());
7705           Size = CGF.Builder.CreatePtrDiff(
7706               CGF.EmitCastToVoidPtr(
7707                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7708               CGF.EmitCastToVoidPtr(LB.getPointer()));
7709           CombinedInfo.Sizes.push_back(
7710               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7711           CombinedInfo.Types.push_back(Flags);
7712           CombinedInfo.Mappers.push_back(nullptr);
7713           break;
7714         }
7715         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7716         if (!IsMemberPointerOrAddr) {
7717           CombinedInfo.BasePointers.push_back(BP.getPointer());
7718           CombinedInfo.Pointers.push_back(LB.getPointer());
7719           CombinedInfo.Sizes.push_back(
7720               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7721 
7722           // If Mapper is valid, the last component inherits the mapper.
7723           bool HasMapper = Mapper && Next == CE;
7724           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7725 
7726           // We need to add a pointer flag for each map that comes from the
7727           // same expression except for the first one. We also need to signal
7728           // this map is the first one that relates with the current capture
7729           // (there is a set of entries for each capture).
7730           OpenMPOffloadMappingFlags Flags =
7731               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7732                              !IsExpressionFirstInfo || RequiresReference ||
7733                                  FirstPointerInComplexData,
7734                              IsCaptureFirstInfo && !RequiresReference);
7735 
7736           if (!IsExpressionFirstInfo) {
7737             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7738             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7739             if (IsPointer)
7740               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7741                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7742 
7743             if (ShouldBeMemberOf) {
7744               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7745               // should be later updated with the correct value of MEMBER_OF.
7746               Flags |= OMP_MAP_MEMBER_OF;
7747               // From now on, all subsequent PTR_AND_OBJ entries should not be
7748               // marked as MEMBER_OF.
7749               ShouldBeMemberOf = false;
7750             }
7751           }
7752 
7753           CombinedInfo.Types.push_back(Flags);
7754         }
7755 
7756         // If we have encountered a member expression so far, keep track of the
7757         // mapped member. If the parent is "*this", then the value declaration
7758         // is nullptr.
7759         if (EncounteredME) {
7760           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7761           unsigned FieldIndex = FD->getFieldIndex();
7762 
7763           // Update info about the lowest and highest elements for this struct
7764           if (!PartialStruct.Base.isValid()) {
7765             PartialStruct.LowestElem = {FieldIndex, LB};
7766             if (IsFinalArraySection) {
7767               Address HB =
7768                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7769                       .getAddress(CGF);
7770               PartialStruct.HighestElem = {FieldIndex, HB};
7771             } else {
7772               PartialStruct.HighestElem = {FieldIndex, LB};
7773             }
7774             PartialStruct.Base = BP;
7775           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7776             PartialStruct.LowestElem = {FieldIndex, LB};
7777           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7778             PartialStruct.HighestElem = {FieldIndex, LB};
7779           }
7780         }
7781 
7782         // If we have a final array section, we are done with this expression.
7783         if (IsFinalArraySection)
7784           break;
7785 
7786         // The pointer becomes the base for the next element.
7787         if (Next != CE)
7788           BP = LB;
7789 
7790         IsExpressionFirstInfo = false;
7791         IsCaptureFirstInfo = false;
7792         FirstPointerInComplexData = false;
7793       }
7794     }
7795   }
7796 
7797   /// Return the adjusted map modifiers if the declaration a capture refers to
7798   /// appears in a first-private clause. This is expected to be used only with
7799   /// directives that start with 'target'.
7800   MappableExprsHandler::OpenMPOffloadMappingFlags
7801   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7802     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7803 
7804     // A first private variable captured by reference will use only the
7805     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7806     // declaration is known as first-private in this handler.
7807     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7808       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7809           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7810         return MappableExprsHandler::OMP_MAP_ALWAYS |
7811                MappableExprsHandler::OMP_MAP_TO;
7812       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7813         return MappableExprsHandler::OMP_MAP_TO |
7814                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7815       return MappableExprsHandler::OMP_MAP_PRIVATE |
7816              MappableExprsHandler::OMP_MAP_TO;
7817     }
7818     return MappableExprsHandler::OMP_MAP_TO |
7819            MappableExprsHandler::OMP_MAP_FROM;
7820   }
7821 
7822   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7823     // Rotate by getFlagMemberOffset() bits.
7824     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7825                                                   << getFlagMemberOffset());
7826   }
7827 
7828   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7829                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7830     // If the entry is PTR_AND_OBJ but has not been marked with the special
7831     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7832     // marked as MEMBER_OF.
7833     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7834         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7835       return;
7836 
7837     // Reset the placeholder value to prepare the flag for the assignment of the
7838     // proper MEMBER_OF value.
7839     Flags &= ~OMP_MAP_MEMBER_OF;
7840     Flags |= MemberOfFlag;
7841   }
7842 
7843   void getPlainLayout(const CXXRecordDecl *RD,
7844                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7845                       bool AsBase) const {
7846     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7847 
7848     llvm::StructType *St =
7849         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7850 
7851     unsigned NumElements = St->getNumElements();
7852     llvm::SmallVector<
7853         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7854         RecordLayout(NumElements);
7855 
7856     // Fill bases.
7857     for (const auto &I : RD->bases()) {
7858       if (I.isVirtual())
7859         continue;
7860       const auto *Base = I.getType()->getAsCXXRecordDecl();
7861       // Ignore empty bases.
7862       if (Base->isEmpty() || CGF.getContext()
7863                                  .getASTRecordLayout(Base)
7864                                  .getNonVirtualSize()
7865                                  .isZero())
7866         continue;
7867 
7868       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7869       RecordLayout[FieldIndex] = Base;
7870     }
7871     // Fill in virtual bases.
7872     for (const auto &I : RD->vbases()) {
7873       const auto *Base = I.getType()->getAsCXXRecordDecl();
7874       // Ignore empty bases.
7875       if (Base->isEmpty())
7876         continue;
7877       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7878       if (RecordLayout[FieldIndex])
7879         continue;
7880       RecordLayout[FieldIndex] = Base;
7881     }
7882     // Fill in all the fields.
7883     assert(!RD->isUnion() && "Unexpected union.");
7884     for (const auto *Field : RD->fields()) {
7885       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7886       // will fill in later.)
7887       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7888         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7889         RecordLayout[FieldIndex] = Field;
7890       }
7891     }
7892     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7893              &Data : RecordLayout) {
7894       if (Data.isNull())
7895         continue;
7896       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7897         getPlainLayout(Base, Layout, /*AsBase=*/true);
7898       else
7899         Layout.push_back(Data.get<const FieldDecl *>());
7900     }
7901   }
7902 
7903 public:
7904   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7905       : CurDir(&Dir), CGF(CGF) {
7906     // Extract firstprivate clause information.
7907     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7908       for (const auto *D : C->varlists())
7909         FirstPrivateDecls.try_emplace(
7910             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7911     // Extract implicit firstprivates from uses_allocators clauses.
7912     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7913       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7914         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7915         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7916           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7917                                         /*Implicit=*/true);
7918         else if (const auto *VD = dyn_cast<VarDecl>(
7919                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7920                          ->getDecl()))
7921           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7922       }
7923     }
7924     // Extract device pointer clause information.
7925     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7926       for (auto L : C->component_lists())
7927         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
7928   }
7929 
7930   /// Constructor for the declare mapper directive.
7931   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7932       : CurDir(&Dir), CGF(CGF) {}
7933 
7934   /// Generate code for the combined entry if we have a partially mapped struct
7935   /// and take care of the mapping flags of the arguments corresponding to
7936   /// individual struct members.
7937   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
7938                          MapFlagsArrayTy &CurTypes,
7939                          const StructRangeInfoTy &PartialStruct,
7940                          bool NotTargetParams = false) const {
7941     // Base is the base of the struct
7942     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
7943     // Pointer is the address of the lowest element
7944     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7945     CombinedInfo.Pointers.push_back(LB);
7946     // There should not be a mapper for a combined entry.
7947     CombinedInfo.Mappers.push_back(nullptr);
7948     // Size is (addr of {highest+1} element) - (addr of lowest element)
7949     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7950     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7951     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7952     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7953     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7954     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7955                                                   /*isSigned=*/false);
7956     CombinedInfo.Sizes.push_back(Size);
7957     // Map type is always TARGET_PARAM, if generate info for captures.
7958     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
7959                                                  : OMP_MAP_TARGET_PARAM);
7960     // If any element has the present modifier, then make sure the runtime
7961     // doesn't attempt to allocate the struct.
7962     if (CurTypes.end() !=
7963         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
7964           return Type & OMP_MAP_PRESENT;
7965         }))
7966       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
7967     // Remove TARGET_PARAM flag from the first element
7968     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7969 
7970     // All other current entries will be MEMBER_OF the combined entry
7971     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7972     // 0xFFFF in the MEMBER_OF field).
7973     OpenMPOffloadMappingFlags MemberOfFlag =
7974         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
7975     for (auto &M : CurTypes)
7976       setCorrectMemberOfFlag(M, MemberOfFlag);
7977   }
7978 
7979   /// Generate all the base pointers, section pointers, sizes, map types, and
7980   /// mappers for the extracted mappable expressions (all included in \a
7981   /// CombinedInfo). Also, for each item that relates with a device pointer, a
7982   /// pair of the relevant declaration and index where it occurs is appended to
7983   /// the device pointers info array.
7984   void generateAllInfo(
7985       MapCombinedInfoTy &CombinedInfo, bool NotTargetParams = false,
7986       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7987           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7988     // We have to process the component lists that relate with the same
7989     // declaration in a single chunk so that we can generate the map flags
7990     // correctly. Therefore, we organize all lists in a map.
7991     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7992 
7993     // Helper function to fill the information map for the different supported
7994     // clauses.
7995     auto &&InfoGen =
7996         [&Info, &SkipVarSet](
7997             const ValueDecl *D,
7998             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7999             OpenMPMapClauseKind MapType,
8000             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8001             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8002             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8003             bool ForDeviceAddr = false) {
8004           const ValueDecl *VD =
8005               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8006           if (SkipVarSet.count(VD))
8007             return;
8008           Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers,
8009                                 ReturnDevicePointer, IsImplicit, Mapper,
8010                                 ForDeviceAddr);
8011         };
8012 
8013     assert(CurDir.is<const OMPExecutableDirective *>() &&
8014            "Expect a executable directive");
8015     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8016     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
8017       for (const auto L : C->component_lists()) {
8018         InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(),
8019                 C->getMapTypeModifiers(), llvm::None,
8020                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L));
8021       }
8022     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
8023       for (const auto L : C->component_lists()) {
8024         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
8025                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8026                 C->isImplicit(), std::get<2>(L));
8027       }
8028     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8029       for (const auto L : C->component_lists()) {
8030         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
8031                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8032                 C->isImplicit(), std::get<2>(L));
8033       }
8034 
8035     // Look at the use_device_ptr clause information and mark the existing map
8036     // entries as such. If there is no map information for an entry in the
8037     // use_device_ptr list, we create one with map type 'alloc' and zero size
8038     // section. It is the user fault if that was not mapped before. If there is
8039     // no map information and the pointer is a struct member, then we defer the
8040     // emission of that entry until the whole struct has been processed.
8041     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8042         DeferredInfo;
8043     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8044 
8045     for (const auto *C :
8046          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8047       for (const auto L : C->component_lists()) {
8048         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8049             std::get<1>(L);
8050         assert(!Components.empty() &&
8051                "Not expecting empty list of components!");
8052         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8053         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8054         const Expr *IE = Components.back().getAssociatedExpression();
8055         // If the first component is a member expression, we have to look into
8056         // 'this', which maps to null in the map of map information. Otherwise
8057         // look directly for the information.
8058         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8059 
8060         // We potentially have map information for this declaration already.
8061         // Look for the first set of components that refer to it.
8062         if (It != Info.end()) {
8063           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8064             return MI.Components.back().getAssociatedDeclaration() == VD;
8065           });
8066           // If we found a map entry, signal that the pointer has to be returned
8067           // and move on to the next declaration.
8068           // Exclude cases where the base pointer is mapped as array subscript,
8069           // array section or array shaping. The base address is passed as a
8070           // pointer to base in this case and cannot be used as a base for
8071           // use_device_ptr list item.
8072           if (CI != It->second.end()) {
8073             auto PrevCI = std::next(CI->Components.rbegin());
8074             const auto *VarD = dyn_cast<VarDecl>(VD);
8075             if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8076                 isa<MemberExpr>(IE) ||
8077                 !VD->getType().getNonReferenceType()->isPointerType() ||
8078                 PrevCI == CI->Components.rend() ||
8079                 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8080                 VarD->hasLocalStorage()) {
8081               CI->ReturnDevicePointer = true;
8082               continue;
8083             }
8084           }
8085         }
8086 
8087         // We didn't find any match in our map information - generate a zero
8088         // size array section - if the pointer is a struct member we defer this
8089         // action until the whole struct has been processed.
8090         if (isa<MemberExpr>(IE)) {
8091           // Insert the pointer into Info to be processed by
8092           // generateInfoForComponentList. Because it is a member pointer
8093           // without a pointee, no entry will be generated for it, therefore
8094           // we need to generate one after the whole struct has been processed.
8095           // Nonetheless, generateInfoForComponentList must be called to take
8096           // the pointer into account for the calculation of the range of the
8097           // partial struct.
8098           InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None,
8099                   /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr);
8100           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8101         } else {
8102           llvm::Value *Ptr =
8103               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8104           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8105           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8106           UseDevicePtrCombinedInfo.Sizes.push_back(
8107               llvm::Constant::getNullValue(CGF.Int64Ty));
8108           UseDevicePtrCombinedInfo.Types.push_back(
8109               OMP_MAP_RETURN_PARAM |
8110               (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM));
8111           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8112         }
8113       }
8114     }
8115 
8116     // Look at the use_device_addr clause information and mark the existing map
8117     // entries as such. If there is no map information for an entry in the
8118     // use_device_addr list, we create one with map type 'alloc' and zero size
8119     // section. It is the user fault if that was not mapped before. If there is
8120     // no map information and the pointer is a struct member, then we defer the
8121     // emission of that entry until the whole struct has been processed.
8122     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8123     for (const auto *C :
8124          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8125       for (const auto L : C->component_lists()) {
8126         assert(!std::get<1>(L).empty() &&
8127                "Not expecting empty list of components!");
8128         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8129         if (!Processed.insert(VD).second)
8130           continue;
8131         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8132         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8133         // If the first component is a member expression, we have to look into
8134         // 'this', which maps to null in the map of map information. Otherwise
8135         // look directly for the information.
8136         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8137 
8138         // We potentially have map information for this declaration already.
8139         // Look for the first set of components that refer to it.
8140         if (It != Info.end()) {
8141           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8142             return MI.Components.back().getAssociatedDeclaration() == VD;
8143           });
8144           // If we found a map entry, signal that the pointer has to be returned
8145           // and move on to the next declaration.
8146           if (CI != It->second.end()) {
8147             CI->ReturnDevicePointer = true;
8148             continue;
8149           }
8150         }
8151 
8152         // We didn't find any match in our map information - generate a zero
8153         // size array section - if the pointer is a struct member we defer this
8154         // action until the whole struct has been processed.
8155         if (isa<MemberExpr>(IE)) {
8156           // Insert the pointer into Info to be processed by
8157           // generateInfoForComponentList. Because it is a member pointer
8158           // without a pointee, no entry will be generated for it, therefore
8159           // we need to generate one after the whole struct has been processed.
8160           // Nonetheless, generateInfoForComponentList must be called to take
8161           // the pointer into account for the calculation of the range of the
8162           // partial struct.
8163           InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8164                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8165                   nullptr, /*ForDeviceAddr=*/true);
8166           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8167         } else {
8168           llvm::Value *Ptr;
8169           if (IE->isGLValue())
8170             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8171           else
8172             Ptr = CGF.EmitScalarExpr(IE);
8173           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8174           CombinedInfo.Pointers.push_back(Ptr);
8175           CombinedInfo.Sizes.push_back(
8176               llvm::Constant::getNullValue(CGF.Int64Ty));
8177           CombinedInfo.Types.push_back(
8178               OMP_MAP_RETURN_PARAM |
8179               (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM));
8180           CombinedInfo.Mappers.push_back(nullptr);
8181         }
8182       }
8183     }
8184 
8185     for (const auto &M : Info) {
8186       // We need to know when we generate information for the first component
8187       // associated with a capture, because the mapping flags depend on it.
8188       bool IsFirstComponentList = !NotTargetParams;
8189 
8190       // Temporary generated information.
8191       MapCombinedInfoTy CurInfo;
8192       StructRangeInfoTy PartialStruct;
8193 
8194       for (const MapInfo &L : M.second) {
8195         assert(!L.Components.empty() &&
8196                "Not expecting declaration with no component lists.");
8197 
8198         // Remember the current base pointer index.
8199         unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8200         generateInfoForComponentList(L.MapType, L.MapModifiers,
8201                                      L.MotionModifiers, L.Components, CurInfo,
8202                                      PartialStruct, IsFirstComponentList,
8203                                      L.IsImplicit, L.Mapper, L.ForDeviceAddr);
8204 
8205         // If this entry relates with a device pointer, set the relevant
8206         // declaration and add the 'return pointer' flag.
8207         if (L.ReturnDevicePointer) {
8208           assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8209                  "Unexpected number of mapped base pointers.");
8210 
8211           const ValueDecl *RelevantVD =
8212               L.Components.back().getAssociatedDeclaration();
8213           assert(RelevantVD &&
8214                  "No relevant declaration related with device pointer??");
8215 
8216           CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8217               RelevantVD);
8218           CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8219         }
8220         IsFirstComponentList = false;
8221       }
8222 
8223       // Append any pending zero-length pointers which are struct members and
8224       // used with use_device_ptr or use_device_addr.
8225       auto CI = DeferredInfo.find(M.first);
8226       if (CI != DeferredInfo.end()) {
8227         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8228           llvm::Value *BasePtr;
8229           llvm::Value *Ptr;
8230           if (L.ForDeviceAddr) {
8231             if (L.IE->isGLValue())
8232               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8233             else
8234               Ptr = this->CGF.EmitScalarExpr(L.IE);
8235             BasePtr = Ptr;
8236             // Entry is RETURN_PARAM. Also, set the placeholder value
8237             // MEMBER_OF=FFFF so that the entry is later updated with the
8238             // correct value of MEMBER_OF.
8239             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8240           } else {
8241             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8242             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8243                                              L.IE->getExprLoc());
8244             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8245             // value MEMBER_OF=FFFF so that the entry is later updated with the
8246             // correct value of MEMBER_OF.
8247             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8248                                     OMP_MAP_MEMBER_OF);
8249           }
8250           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8251           CurInfo.Pointers.push_back(Ptr);
8252           CurInfo.Sizes.push_back(
8253               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8254           CurInfo.Mappers.push_back(nullptr);
8255         }
8256       }
8257 
8258       // If there is an entry in PartialStruct it means we have a struct with
8259       // individual members mapped. Emit an extra combined entry.
8260       if (PartialStruct.Base.isValid())
8261         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
8262                           NotTargetParams);
8263 
8264       // We need to append the results of this capture to what we already have.
8265       CombinedInfo.append(CurInfo);
8266     }
8267     // Append data for use_device_ptr clauses.
8268     CombinedInfo.append(UseDevicePtrCombinedInfo);
8269   }
8270 
8271   /// Generate all the base pointers, section pointers, sizes, map types, and
8272   /// mappers for the extracted map clauses of user-defined mapper (all included
8273   /// in \a CombinedInfo).
8274   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8275     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8276            "Expect a declare mapper directive");
8277     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8278     // We have to process the component lists that relate with the same
8279     // declaration in a single chunk so that we can generate the map flags
8280     // correctly. Therefore, we organize all lists in a map.
8281     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8282 
8283     // Fill the information map for map clauses.
8284     for (const auto *C : CurMapperDir->clauselists()) {
8285       const auto *MC = cast<OMPMapClause>(C);
8286       for (const auto L : MC->component_lists()) {
8287         const ValueDecl *VD =
8288             std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl())
8289                            : nullptr;
8290         // Get the corresponding user-defined mapper.
8291         Info[VD].emplace_back(std::get<1>(L), MC->getMapType(),
8292                               MC->getMapTypeModifiers(), llvm::None,
8293                               /*ReturnDevicePointer=*/false, MC->isImplicit(),
8294                               std::get<2>(L));
8295       }
8296     }
8297 
8298     for (const auto &M : Info) {
8299       // We need to know when we generate information for the first component
8300       // associated with a capture, because the mapping flags depend on it.
8301       bool IsFirstComponentList = true;
8302 
8303       // Temporary generated information.
8304       MapCombinedInfoTy CurInfo;
8305       StructRangeInfoTy PartialStruct;
8306 
8307       for (const MapInfo &L : M.second) {
8308         assert(!L.Components.empty() &&
8309                "Not expecting declaration with no component lists.");
8310         generateInfoForComponentList(L.MapType, L.MapModifiers,
8311                                      L.MotionModifiers, L.Components, CurInfo,
8312                                      PartialStruct, IsFirstComponentList,
8313                                      L.IsImplicit, L.Mapper, L.ForDeviceAddr);
8314         IsFirstComponentList = false;
8315       }
8316 
8317       // If there is an entry in PartialStruct it means we have a struct with
8318       // individual members mapped. Emit an extra combined entry.
8319       if (PartialStruct.Base.isValid())
8320         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
8321 
8322       // We need to append the results of this capture to what we already have.
8323       CombinedInfo.append(CurInfo);
8324     }
8325   }
8326 
8327   /// Emit capture info for lambdas for variables captured by reference.
8328   void generateInfoForLambdaCaptures(
8329       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8330       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8331     const auto *RD = VD->getType()
8332                          .getCanonicalType()
8333                          .getNonReferenceType()
8334                          ->getAsCXXRecordDecl();
8335     if (!RD || !RD->isLambda())
8336       return;
8337     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8338     LValue VDLVal = CGF.MakeAddrLValue(
8339         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8340     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8341     FieldDecl *ThisCapture = nullptr;
8342     RD->getCaptureFields(Captures, ThisCapture);
8343     if (ThisCapture) {
8344       LValue ThisLVal =
8345           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8346       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8347       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8348                                  VDLVal.getPointer(CGF));
8349       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8350       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8351       CombinedInfo.Sizes.push_back(
8352           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8353                                     CGF.Int64Ty, /*isSigned=*/true));
8354       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8355                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8356       CombinedInfo.Mappers.push_back(nullptr);
8357     }
8358     for (const LambdaCapture &LC : RD->captures()) {
8359       if (!LC.capturesVariable())
8360         continue;
8361       const VarDecl *VD = LC.getCapturedVar();
8362       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8363         continue;
8364       auto It = Captures.find(VD);
8365       assert(It != Captures.end() && "Found lambda capture without field.");
8366       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8367       if (LC.getCaptureKind() == LCK_ByRef) {
8368         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8369         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8370                                    VDLVal.getPointer(CGF));
8371         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8372         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8373         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8374             CGF.getTypeSize(
8375                 VD->getType().getCanonicalType().getNonReferenceType()),
8376             CGF.Int64Ty, /*isSigned=*/true));
8377       } else {
8378         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8379         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8380                                    VDLVal.getPointer(CGF));
8381         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8382         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8383         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8384       }
8385       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8386                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8387       CombinedInfo.Mappers.push_back(nullptr);
8388     }
8389   }
8390 
8391   /// Set correct indices for lambdas captures.
8392   void adjustMemberOfForLambdaCaptures(
8393       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8394       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8395       MapFlagsArrayTy &Types) const {
8396     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8397       // Set correct member_of idx for all implicit lambda captures.
8398       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8399                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8400         continue;
8401       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8402       assert(BasePtr && "Unable to find base lambda address.");
8403       int TgtIdx = -1;
8404       for (unsigned J = I; J > 0; --J) {
8405         unsigned Idx = J - 1;
8406         if (Pointers[Idx] != BasePtr)
8407           continue;
8408         TgtIdx = Idx;
8409         break;
8410       }
8411       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8412       // All other current entries will be MEMBER_OF the combined entry
8413       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8414       // 0xFFFF in the MEMBER_OF field).
8415       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8416       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8417     }
8418   }
8419 
8420   /// Generate the base pointers, section pointers, sizes, map types, and
8421   /// mappers associated to a given capture (all included in \a CombinedInfo).
8422   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8423                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8424                               StructRangeInfoTy &PartialStruct) const {
8425     assert(!Cap->capturesVariableArrayType() &&
8426            "Not expecting to generate map info for a variable array type!");
8427 
8428     // We need to know when we generating information for the first component
8429     const ValueDecl *VD = Cap->capturesThis()
8430                               ? nullptr
8431                               : Cap->getCapturedVar()->getCanonicalDecl();
8432 
8433     // If this declaration appears in a is_device_ptr clause we just have to
8434     // pass the pointer by value. If it is a reference to a declaration, we just
8435     // pass its value.
8436     if (DevPointersMap.count(VD)) {
8437       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8438       CombinedInfo.Pointers.push_back(Arg);
8439       CombinedInfo.Sizes.push_back(
8440           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8441                                     CGF.Int64Ty, /*isSigned=*/true));
8442       CombinedInfo.Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8443       CombinedInfo.Mappers.push_back(nullptr);
8444       return;
8445     }
8446 
8447     using MapData =
8448         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8449                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8450                    const ValueDecl *>;
8451     SmallVector<MapData, 4> DeclComponentLists;
8452     assert(CurDir.is<const OMPExecutableDirective *>() &&
8453            "Expect a executable directive");
8454     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8455     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8456       for (const auto L : C->decl_component_lists(VD)) {
8457         const ValueDecl *VDecl, *Mapper;
8458         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8459         std::tie(VDecl, Components, Mapper) = L;
8460         assert(VDecl == VD && "We got information for the wrong declaration??");
8461         assert(!Components.empty() &&
8462                "Not expecting declaration with no component lists.");
8463         DeclComponentLists.emplace_back(Components, C->getMapType(),
8464                                         C->getMapTypeModifiers(),
8465                                         C->isImplicit(), Mapper);
8466       }
8467     }
8468 
8469     // Find overlapping elements (including the offset from the base element).
8470     llvm::SmallDenseMap<
8471         const MapData *,
8472         llvm::SmallVector<
8473             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8474         4>
8475         OverlappedData;
8476     size_t Count = 0;
8477     for (const MapData &L : DeclComponentLists) {
8478       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8479       OpenMPMapClauseKind MapType;
8480       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8481       bool IsImplicit;
8482       const ValueDecl *Mapper;
8483       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8484       ++Count;
8485       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8486         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8487         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1;
8488         auto CI = Components.rbegin();
8489         auto CE = Components.rend();
8490         auto SI = Components1.rbegin();
8491         auto SE = Components1.rend();
8492         for (; CI != CE && SI != SE; ++CI, ++SI) {
8493           if (CI->getAssociatedExpression()->getStmtClass() !=
8494               SI->getAssociatedExpression()->getStmtClass())
8495             break;
8496           // Are we dealing with different variables/fields?
8497           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8498             break;
8499         }
8500         // Found overlapping if, at least for one component, reached the head of
8501         // the components list.
8502         if (CI == CE || SI == SE) {
8503           assert((CI != CE || SI != SE) &&
8504                  "Unexpected full match of the mapping components.");
8505           const MapData &BaseData = CI == CE ? L : L1;
8506           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8507               SI == SE ? Components : Components1;
8508           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8509           OverlappedElements.getSecond().push_back(SubData);
8510         }
8511       }
8512     }
8513     // Sort the overlapped elements for each item.
8514     llvm::SmallVector<const FieldDecl *, 4> Layout;
8515     if (!OverlappedData.empty()) {
8516       if (const auto *CRD =
8517               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8518         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8519       else {
8520         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8521         Layout.append(RD->field_begin(), RD->field_end());
8522       }
8523     }
8524     for (auto &Pair : OverlappedData) {
8525       llvm::sort(
8526           Pair.getSecond(),
8527           [&Layout](
8528               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8529               OMPClauseMappableExprCommon::MappableExprComponentListRef
8530                   Second) {
8531             auto CI = First.rbegin();
8532             auto CE = First.rend();
8533             auto SI = Second.rbegin();
8534             auto SE = Second.rend();
8535             for (; CI != CE && SI != SE; ++CI, ++SI) {
8536               if (CI->getAssociatedExpression()->getStmtClass() !=
8537                   SI->getAssociatedExpression()->getStmtClass())
8538                 break;
8539               // Are we dealing with different variables/fields?
8540               if (CI->getAssociatedDeclaration() !=
8541                   SI->getAssociatedDeclaration())
8542                 break;
8543             }
8544 
8545             // Lists contain the same elements.
8546             if (CI == CE && SI == SE)
8547               return false;
8548 
8549             // List with less elements is less than list with more elements.
8550             if (CI == CE || SI == SE)
8551               return CI == CE;
8552 
8553             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8554             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8555             if (FD1->getParent() == FD2->getParent())
8556               return FD1->getFieldIndex() < FD2->getFieldIndex();
8557             const auto It =
8558                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8559                   return FD == FD1 || FD == FD2;
8560                 });
8561             return *It == FD1;
8562           });
8563     }
8564 
8565     // Associated with a capture, because the mapping flags depend on it.
8566     // Go through all of the elements with the overlapped elements.
8567     for (const auto &Pair : OverlappedData) {
8568       const MapData &L = *Pair.getFirst();
8569       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8570       OpenMPMapClauseKind MapType;
8571       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8572       bool IsImplicit;
8573       const ValueDecl *Mapper;
8574       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8575       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8576           OverlappedComponents = Pair.getSecond();
8577       bool IsFirstComponentList = true;
8578       generateInfoForComponentList(
8579           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
8580           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8581           /*ForDeviceAddr=*/false, OverlappedComponents);
8582     }
8583     // Go through other elements without overlapped elements.
8584     bool IsFirstComponentList = OverlappedData.empty();
8585     for (const MapData &L : DeclComponentLists) {
8586       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8587       OpenMPMapClauseKind MapType;
8588       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8589       bool IsImplicit;
8590       const ValueDecl *Mapper;
8591       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8592       auto It = OverlappedData.find(&L);
8593       if (It == OverlappedData.end())
8594         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
8595                                      Components, CombinedInfo, PartialStruct,
8596                                      IsFirstComponentList, IsImplicit, Mapper);
8597       IsFirstComponentList = false;
8598     }
8599   }
8600 
8601   /// Generate the default map information for a given capture \a CI,
8602   /// record field declaration \a RI and captured value \a CV.
8603   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8604                               const FieldDecl &RI, llvm::Value *CV,
8605                               MapCombinedInfoTy &CombinedInfo) const {
8606     bool IsImplicit = true;
8607     // Do the default mapping.
8608     if (CI.capturesThis()) {
8609       CombinedInfo.BasePointers.push_back(CV);
8610       CombinedInfo.Pointers.push_back(CV);
8611       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8612       CombinedInfo.Sizes.push_back(
8613           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8614                                     CGF.Int64Ty, /*isSigned=*/true));
8615       // Default map type.
8616       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8617     } else if (CI.capturesVariableByCopy()) {
8618       CombinedInfo.BasePointers.push_back(CV);
8619       CombinedInfo.Pointers.push_back(CV);
8620       if (!RI.getType()->isAnyPointerType()) {
8621         // We have to signal to the runtime captures passed by value that are
8622         // not pointers.
8623         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
8624         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8625             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8626       } else {
8627         // Pointers are implicitly mapped with a zero size and no flags
8628         // (other than first map that is added for all implicit maps).
8629         CombinedInfo.Types.push_back(OMP_MAP_NONE);
8630         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8631       }
8632       const VarDecl *VD = CI.getCapturedVar();
8633       auto I = FirstPrivateDecls.find(VD);
8634       if (I != FirstPrivateDecls.end())
8635         IsImplicit = I->getSecond();
8636     } else {
8637       assert(CI.capturesVariable() && "Expected captured reference.");
8638       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8639       QualType ElementType = PtrTy->getPointeeType();
8640       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8641           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8642       // The default map type for a scalar/complex type is 'to' because by
8643       // default the value doesn't have to be retrieved. For an aggregate
8644       // type, the default is 'tofrom'.
8645       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8646       const VarDecl *VD = CI.getCapturedVar();
8647       auto I = FirstPrivateDecls.find(VD);
8648       if (I != FirstPrivateDecls.end() &&
8649           VD->getType().isConstant(CGF.getContext())) {
8650         llvm::Constant *Addr =
8651             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8652         // Copy the value of the original variable to the new global copy.
8653         CGF.Builder.CreateMemCpy(
8654             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8655             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8656             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
8657         // Use new global variable as the base pointers.
8658         CombinedInfo.BasePointers.push_back(Addr);
8659         CombinedInfo.Pointers.push_back(Addr);
8660       } else {
8661         CombinedInfo.BasePointers.push_back(CV);
8662         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8663           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8664               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8665               AlignmentSource::Decl));
8666           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8667         } else {
8668           CombinedInfo.Pointers.push_back(CV);
8669         }
8670       }
8671       if (I != FirstPrivateDecls.end())
8672         IsImplicit = I->getSecond();
8673     }
8674     // Every default map produces a single argument which is a target parameter.
8675     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
8676 
8677     // Add flag stating this is an implicit map.
8678     if (IsImplicit)
8679       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
8680 
8681     // No user-defined mapper for default mapping.
8682     CombinedInfo.Mappers.push_back(nullptr);
8683   }
8684 };
8685 } // anonymous namespace
8686 
8687 /// Emit the arrays used to pass the captures and map information to the
8688 /// offloading runtime library. If there is no map or capture information,
8689 /// return nullptr by reference.
8690 static void
8691 emitOffloadingArrays(CodeGenFunction &CGF,
8692                      MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8693                      CGOpenMPRuntime::TargetDataInfo &Info) {
8694   CodeGenModule &CGM = CGF.CGM;
8695   ASTContext &Ctx = CGF.getContext();
8696 
8697   // Reset the array information.
8698   Info.clearArrayInfo();
8699   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8700 
8701   if (Info.NumberOfPtrs) {
8702     // Detect if we have any capture size requiring runtime evaluation of the
8703     // size so that a constant array could be eventually used.
8704     bool hasRuntimeEvaluationCaptureSize = false;
8705     for (llvm::Value *S : CombinedInfo.Sizes)
8706       if (!isa<llvm::Constant>(S)) {
8707         hasRuntimeEvaluationCaptureSize = true;
8708         break;
8709       }
8710 
8711     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8712     QualType PointerArrayType = Ctx.getConstantArrayType(
8713         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8714         /*IndexTypeQuals=*/0);
8715 
8716     Info.BasePointersArray =
8717         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8718     Info.PointersArray =
8719         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8720     Address MappersArray =
8721         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
8722     Info.MappersArray = MappersArray.getPointer();
8723 
8724     // If we don't have any VLA types or other types that require runtime
8725     // evaluation, we can use a constant array for the map sizes, otherwise we
8726     // need to fill up the arrays as we do for the pointers.
8727     QualType Int64Ty =
8728         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8729     if (hasRuntimeEvaluationCaptureSize) {
8730       QualType SizeArrayType = Ctx.getConstantArrayType(
8731           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8732           /*IndexTypeQuals=*/0);
8733       Info.SizesArray =
8734           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8735     } else {
8736       // We expect all the sizes to be constant, so we collect them to create
8737       // a constant array.
8738       SmallVector<llvm::Constant *, 16> ConstSizes;
8739       for (llvm::Value *S : CombinedInfo.Sizes)
8740         ConstSizes.push_back(cast<llvm::Constant>(S));
8741 
8742       auto *SizesArrayInit = llvm::ConstantArray::get(
8743           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8744       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8745       auto *SizesArrayGbl = new llvm::GlobalVariable(
8746           CGM.getModule(), SizesArrayInit->getType(),
8747           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8748           SizesArrayInit, Name);
8749       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8750       Info.SizesArray = SizesArrayGbl;
8751     }
8752 
8753     // The map types are always constant so we don't need to generate code to
8754     // fill arrays. Instead, we create an array constant.
8755     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
8756     llvm::copy(CombinedInfo.Types, Mapping.begin());
8757     llvm::Constant *MapTypesArrayInit =
8758         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8759     std::string MaptypesName =
8760         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8761     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8762         CGM.getModule(), MapTypesArrayInit->getType(),
8763         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8764         MapTypesArrayInit, MaptypesName);
8765     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8766     Info.MapTypesArray = MapTypesArrayGbl;
8767 
8768     // If there's a present map type modifier, it must not be applied to the end
8769     // of a region, so generate a separate map type array in that case.
8770     if (Info.separateBeginEndCalls()) {
8771       bool EndMapTypesDiffer = false;
8772       for (uint64_t &Type : Mapping) {
8773         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
8774           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
8775           EndMapTypesDiffer = true;
8776         }
8777       }
8778       if (EndMapTypesDiffer) {
8779         MapTypesArrayInit =
8780             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8781         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8782         MapTypesArrayGbl = new llvm::GlobalVariable(
8783             CGM.getModule(), MapTypesArrayInit->getType(),
8784             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8785             MapTypesArrayInit, MaptypesName);
8786         MapTypesArrayGbl->setUnnamedAddr(
8787             llvm::GlobalValue::UnnamedAddr::Global);
8788         Info.MapTypesArrayEnd = MapTypesArrayGbl;
8789       }
8790     }
8791 
8792     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8793       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
8794       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8795           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8796           Info.BasePointersArray, 0, I);
8797       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8798           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8799       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8800       CGF.Builder.CreateStore(BPVal, BPAddr);
8801 
8802       if (Info.requiresDevicePointerInfo())
8803         if (const ValueDecl *DevVD =
8804                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
8805           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8806 
8807       llvm::Value *PVal = CombinedInfo.Pointers[I];
8808       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8809           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8810           Info.PointersArray, 0, I);
8811       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8812           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8813       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8814       CGF.Builder.CreateStore(PVal, PAddr);
8815 
8816       if (hasRuntimeEvaluationCaptureSize) {
8817         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8818             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8819             Info.SizesArray,
8820             /*Idx0=*/0,
8821             /*Idx1=*/I);
8822         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8823         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
8824                                                           CGM.Int64Ty,
8825                                                           /*isSigned=*/true),
8826                                 SAddr);
8827       }
8828 
8829       // Fill up the mapper array.
8830       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
8831       if (CombinedInfo.Mappers[I]) {
8832         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8833             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8834         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
8835         Info.HasMapper = true;
8836       }
8837       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
8838       CGF.Builder.CreateStore(MFunc, MAddr);
8839     }
8840   }
8841 }
8842 
8843 /// Emit the arguments to be passed to the runtime library based on the
8844 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
8845 /// ForEndCall, emit map types to be passed for the end of the region instead of
8846 /// the beginning.
8847 static void emitOffloadingArraysArgument(
8848     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8849     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8850     llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg,
8851     CGOpenMPRuntime::TargetDataInfo &Info, bool ForEndCall = false) {
8852   assert((!ForEndCall || Info.separateBeginEndCalls()) &&
8853          "expected region end call to runtime only when end call is separate");
8854   CodeGenModule &CGM = CGF.CGM;
8855   if (Info.NumberOfPtrs) {
8856     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8857         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8858         Info.BasePointersArray,
8859         /*Idx0=*/0, /*Idx1=*/0);
8860     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8861         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8862         Info.PointersArray,
8863         /*Idx0=*/0,
8864         /*Idx1=*/0);
8865     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8866         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8867         /*Idx0=*/0, /*Idx1=*/0);
8868     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8869         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8870         ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
8871                                             : Info.MapTypesArray,
8872         /*Idx0=*/0,
8873         /*Idx1=*/0);
8874     MappersArrayArg =
8875         Info.HasMapper
8876             ? CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy)
8877             : llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8878   } else {
8879     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8880     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8881     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8882     MapTypesArrayArg =
8883         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8884     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8885   }
8886 }
8887 
8888 /// Check for inner distribute directive.
8889 static const OMPExecutableDirective *
8890 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8891   const auto *CS = D.getInnermostCapturedStmt();
8892   const auto *Body =
8893       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8894   const Stmt *ChildStmt =
8895       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8896 
8897   if (const auto *NestedDir =
8898           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8899     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8900     switch (D.getDirectiveKind()) {
8901     case OMPD_target:
8902       if (isOpenMPDistributeDirective(DKind))
8903         return NestedDir;
8904       if (DKind == OMPD_teams) {
8905         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8906             /*IgnoreCaptured=*/true);
8907         if (!Body)
8908           return nullptr;
8909         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8910         if (const auto *NND =
8911                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8912           DKind = NND->getDirectiveKind();
8913           if (isOpenMPDistributeDirective(DKind))
8914             return NND;
8915         }
8916       }
8917       return nullptr;
8918     case OMPD_target_teams:
8919       if (isOpenMPDistributeDirective(DKind))
8920         return NestedDir;
8921       return nullptr;
8922     case OMPD_target_parallel:
8923     case OMPD_target_simd:
8924     case OMPD_target_parallel_for:
8925     case OMPD_target_parallel_for_simd:
8926       return nullptr;
8927     case OMPD_target_teams_distribute:
8928     case OMPD_target_teams_distribute_simd:
8929     case OMPD_target_teams_distribute_parallel_for:
8930     case OMPD_target_teams_distribute_parallel_for_simd:
8931     case OMPD_parallel:
8932     case OMPD_for:
8933     case OMPD_parallel_for:
8934     case OMPD_parallel_master:
8935     case OMPD_parallel_sections:
8936     case OMPD_for_simd:
8937     case OMPD_parallel_for_simd:
8938     case OMPD_cancel:
8939     case OMPD_cancellation_point:
8940     case OMPD_ordered:
8941     case OMPD_threadprivate:
8942     case OMPD_allocate:
8943     case OMPD_task:
8944     case OMPD_simd:
8945     case OMPD_sections:
8946     case OMPD_section:
8947     case OMPD_single:
8948     case OMPD_master:
8949     case OMPD_critical:
8950     case OMPD_taskyield:
8951     case OMPD_barrier:
8952     case OMPD_taskwait:
8953     case OMPD_taskgroup:
8954     case OMPD_atomic:
8955     case OMPD_flush:
8956     case OMPD_depobj:
8957     case OMPD_scan:
8958     case OMPD_teams:
8959     case OMPD_target_data:
8960     case OMPD_target_exit_data:
8961     case OMPD_target_enter_data:
8962     case OMPD_distribute:
8963     case OMPD_distribute_simd:
8964     case OMPD_distribute_parallel_for:
8965     case OMPD_distribute_parallel_for_simd:
8966     case OMPD_teams_distribute:
8967     case OMPD_teams_distribute_simd:
8968     case OMPD_teams_distribute_parallel_for:
8969     case OMPD_teams_distribute_parallel_for_simd:
8970     case OMPD_target_update:
8971     case OMPD_declare_simd:
8972     case OMPD_declare_variant:
8973     case OMPD_begin_declare_variant:
8974     case OMPD_end_declare_variant:
8975     case OMPD_declare_target:
8976     case OMPD_end_declare_target:
8977     case OMPD_declare_reduction:
8978     case OMPD_declare_mapper:
8979     case OMPD_taskloop:
8980     case OMPD_taskloop_simd:
8981     case OMPD_master_taskloop:
8982     case OMPD_master_taskloop_simd:
8983     case OMPD_parallel_master_taskloop:
8984     case OMPD_parallel_master_taskloop_simd:
8985     case OMPD_requires:
8986     case OMPD_unknown:
8987     default:
8988       llvm_unreachable("Unexpected directive.");
8989     }
8990   }
8991 
8992   return nullptr;
8993 }
8994 
8995 /// Emit the user-defined mapper function. The code generation follows the
8996 /// pattern in the example below.
8997 /// \code
8998 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8999 ///                                           void *base, void *begin,
9000 ///                                           int64_t size, int64_t type) {
9001 ///   // Allocate space for an array section first.
9002 ///   if (size > 1 && !maptype.IsDelete)
9003 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9004 ///                                 size*sizeof(Ty), clearToFrom(type));
9005 ///   // Map members.
9006 ///   for (unsigned i = 0; i < size; i++) {
9007 ///     // For each component specified by this mapper:
9008 ///     for (auto c : all_components) {
9009 ///       if (c.hasMapper())
9010 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9011 ///                       c.arg_type);
9012 ///       else
9013 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9014 ///                                     c.arg_begin, c.arg_size, c.arg_type);
9015 ///     }
9016 ///   }
9017 ///   // Delete the array section.
9018 ///   if (size > 1 && maptype.IsDelete)
9019 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9020 ///                                 size*sizeof(Ty), clearToFrom(type));
9021 /// }
9022 /// \endcode
9023 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9024                                             CodeGenFunction *CGF) {
9025   if (UDMMap.count(D) > 0)
9026     return;
9027   ASTContext &C = CGM.getContext();
9028   QualType Ty = D->getType();
9029   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9030   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9031   auto *MapperVarDecl =
9032       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9033   SourceLocation Loc = D->getLocation();
9034   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9035 
9036   // Prepare mapper function arguments and attributes.
9037   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9038                               C.VoidPtrTy, ImplicitParamDecl::Other);
9039   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9040                             ImplicitParamDecl::Other);
9041   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9042                              C.VoidPtrTy, ImplicitParamDecl::Other);
9043   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9044                             ImplicitParamDecl::Other);
9045   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9046                             ImplicitParamDecl::Other);
9047   FunctionArgList Args;
9048   Args.push_back(&HandleArg);
9049   Args.push_back(&BaseArg);
9050   Args.push_back(&BeginArg);
9051   Args.push_back(&SizeArg);
9052   Args.push_back(&TypeArg);
9053   const CGFunctionInfo &FnInfo =
9054       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9055   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9056   SmallString<64> TyStr;
9057   llvm::raw_svector_ostream Out(TyStr);
9058   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9059   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9060   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9061                                     Name, &CGM.getModule());
9062   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9063   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9064   // Start the mapper function code generation.
9065   CodeGenFunction MapperCGF(CGM);
9066   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9067   // Compute the starting and end addreses of array elements.
9068   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9069       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9070       C.getPointerType(Int64Ty), Loc);
9071   // Convert the size in bytes into the number of array elements.
9072   Size = MapperCGF.Builder.CreateExactUDiv(
9073       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9074   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9075       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9076       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9077   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9078   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9079       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9080       C.getPointerType(Int64Ty), Loc);
9081   // Prepare common arguments for array initiation and deletion.
9082   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9083       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9084       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9085   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9086       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9087       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9088   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9089       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9090       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9091 
9092   // Emit array initiation if this is an array section and \p MapType indicates
9093   // that memory allocation is required.
9094   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9095   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9096                              ElementSize, HeadBB, /*IsInit=*/true);
9097 
9098   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9099 
9100   // Emit the loop header block.
9101   MapperCGF.EmitBlock(HeadBB);
9102   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9103   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9104   // Evaluate whether the initial condition is satisfied.
9105   llvm::Value *IsEmpty =
9106       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9107   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9108   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9109 
9110   // Emit the loop body block.
9111   MapperCGF.EmitBlock(BodyBB);
9112   llvm::BasicBlock *LastBB = BodyBB;
9113   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9114       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9115   PtrPHI->addIncoming(PtrBegin, EntryBB);
9116   Address PtrCurrent =
9117       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9118                           .getAlignment()
9119                           .alignmentOfArrayElement(ElementSize));
9120   // Privatize the declared variable of mapper to be the current array element.
9121   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9122   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9123     return MapperCGF
9124         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9125         .getAddress(MapperCGF);
9126   });
9127   (void)Scope.Privatize();
9128 
9129   // Get map clause information. Fill up the arrays with all mapped variables.
9130   MappableExprsHandler::MapCombinedInfoTy Info;
9131   MappableExprsHandler MEHandler(*D, MapperCGF);
9132   MEHandler.generateAllInfoForMapper(Info);
9133 
9134   // Call the runtime API __tgt_mapper_num_components to get the number of
9135   // pre-existing components.
9136   llvm::Value *OffloadingArgs[] = {Handle};
9137   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9138       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9139                                             OMPRTL___tgt_mapper_num_components),
9140       OffloadingArgs);
9141   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9142       PreviousSize,
9143       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9144 
9145   // Fill up the runtime mapper handle for all components.
9146   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9147     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9148         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9149     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9150         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9151     llvm::Value *CurSizeArg = Info.Sizes[I];
9152 
9153     // Extract the MEMBER_OF field from the map type.
9154     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9155     MapperCGF.EmitBlock(MemberBB);
9156     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9157     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9158         OriMapType,
9159         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9160     llvm::BasicBlock *MemberCombineBB =
9161         MapperCGF.createBasicBlock("omp.member.combine");
9162     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9163     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9164     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9165     // Add the number of pre-existing components to the MEMBER_OF field if it
9166     // is valid.
9167     MapperCGF.EmitBlock(MemberCombineBB);
9168     llvm::Value *CombinedMember =
9169         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9170     // Do nothing if it is not a member of previous components.
9171     MapperCGF.EmitBlock(TypeBB);
9172     llvm::PHINode *MemberMapType =
9173         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9174     MemberMapType->addIncoming(OriMapType, MemberBB);
9175     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9176 
9177     // Combine the map type inherited from user-defined mapper with that
9178     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9179     // bits of the \a MapType, which is the input argument of the mapper
9180     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9181     // bits of MemberMapType.
9182     // [OpenMP 5.0], 1.2.6. map-type decay.
9183     //        | alloc |  to   | from  | tofrom | release | delete
9184     // ----------------------------------------------------------
9185     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9186     // to     | alloc |  to   | alloc |   to   | release | delete
9187     // from   | alloc | alloc | from  |  from  | release | delete
9188     // tofrom | alloc |  to   | from  | tofrom | release | delete
9189     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9190         MapType,
9191         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9192                                    MappableExprsHandler::OMP_MAP_FROM));
9193     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9194     llvm::BasicBlock *AllocElseBB =
9195         MapperCGF.createBasicBlock("omp.type.alloc.else");
9196     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9197     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9198     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9199     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9200     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9201     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9202     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9203     MapperCGF.EmitBlock(AllocBB);
9204     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9205         MemberMapType,
9206         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9207                                      MappableExprsHandler::OMP_MAP_FROM)));
9208     MapperCGF.Builder.CreateBr(EndBB);
9209     MapperCGF.EmitBlock(AllocElseBB);
9210     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9211         LeftToFrom,
9212         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9213     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9214     // In case of to, clear OMP_MAP_FROM.
9215     MapperCGF.EmitBlock(ToBB);
9216     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9217         MemberMapType,
9218         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9219     MapperCGF.Builder.CreateBr(EndBB);
9220     MapperCGF.EmitBlock(ToElseBB);
9221     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9222         LeftToFrom,
9223         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9224     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9225     // In case of from, clear OMP_MAP_TO.
9226     MapperCGF.EmitBlock(FromBB);
9227     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9228         MemberMapType,
9229         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9230     // In case of tofrom, do nothing.
9231     MapperCGF.EmitBlock(EndBB);
9232     LastBB = EndBB;
9233     llvm::PHINode *CurMapType =
9234         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9235     CurMapType->addIncoming(AllocMapType, AllocBB);
9236     CurMapType->addIncoming(ToMapType, ToBB);
9237     CurMapType->addIncoming(FromMapType, FromBB);
9238     CurMapType->addIncoming(MemberMapType, ToElseBB);
9239 
9240     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9241                                      CurSizeArg, CurMapType};
9242     if (Info.Mappers[I]) {
9243       // Call the corresponding mapper function.
9244       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9245           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9246       assert(MapperFunc && "Expect a valid mapper function is available.");
9247       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9248     } else {
9249       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9250       // data structure.
9251       MapperCGF.EmitRuntimeCall(
9252           OMPBuilder.getOrCreateRuntimeFunction(
9253               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9254           OffloadingArgs);
9255     }
9256   }
9257 
9258   // Update the pointer to point to the next element that needs to be mapped,
9259   // and check whether we have mapped all elements.
9260   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9261       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9262   PtrPHI->addIncoming(PtrNext, LastBB);
9263   llvm::Value *IsDone =
9264       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9265   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9266   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9267 
9268   MapperCGF.EmitBlock(ExitBB);
9269   // Emit array deletion if this is an array section and \p MapType indicates
9270   // that deletion is required.
9271   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9272                              ElementSize, DoneBB, /*IsInit=*/false);
9273 
9274   // Emit the function exit block.
9275   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9276   MapperCGF.FinishFunction();
9277   UDMMap.try_emplace(D, Fn);
9278   if (CGF) {
9279     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9280     Decls.second.push_back(D);
9281   }
9282 }
9283 
9284 /// Emit the array initialization or deletion portion for user-defined mapper
9285 /// code generation. First, it evaluates whether an array section is mapped and
9286 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9287 /// true, and \a MapType indicates to not delete this array, array
9288 /// initialization code is generated. If \a IsInit is false, and \a MapType
9289 /// indicates to not this array, array deletion code is generated.
9290 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9291     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9292     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9293     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9294   StringRef Prefix = IsInit ? ".init" : ".del";
9295 
9296   // Evaluate if this is an array section.
9297   llvm::BasicBlock *IsDeleteBB =
9298       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9299   llvm::BasicBlock *BodyBB =
9300       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9301   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9302       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9303   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9304 
9305   // Evaluate if we are going to delete this section.
9306   MapperCGF.EmitBlock(IsDeleteBB);
9307   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9308       MapType,
9309       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9310   llvm::Value *DeleteCond;
9311   if (IsInit) {
9312     DeleteCond = MapperCGF.Builder.CreateIsNull(
9313         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9314   } else {
9315     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9316         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9317   }
9318   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9319 
9320   MapperCGF.EmitBlock(BodyBB);
9321   // Get the array size by multiplying element size and element number (i.e., \p
9322   // Size).
9323   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9324       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9325   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9326   // memory allocation/deletion purpose only.
9327   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9328       MapType,
9329       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9330                                    MappableExprsHandler::OMP_MAP_FROM)));
9331   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9332   // data structure.
9333   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9334   MapperCGF.EmitRuntimeCall(
9335       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9336                                             OMPRTL___tgt_push_mapper_component),
9337       OffloadingArgs);
9338 }
9339 
9340 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9341     const OMPDeclareMapperDecl *D) {
9342   auto I = UDMMap.find(D);
9343   if (I != UDMMap.end())
9344     return I->second;
9345   emitUserDefinedMapper(D);
9346   return UDMMap.lookup(D);
9347 }
9348 
9349 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9350     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9351     llvm::Value *DeviceID,
9352     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9353                                      const OMPLoopDirective &D)>
9354         SizeEmitter) {
9355   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9356   const OMPExecutableDirective *TD = &D;
9357   // Get nested teams distribute kind directive, if any.
9358   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9359     TD = getNestedDistributeDirective(CGM.getContext(), D);
9360   if (!TD)
9361     return;
9362   const auto *LD = cast<OMPLoopDirective>(TD);
9363   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9364                                                      PrePostActionTy &) {
9365     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9366       llvm::Value *Args[] = {DeviceID, NumIterations};
9367       CGF.EmitRuntimeCall(
9368           OMPBuilder.getOrCreateRuntimeFunction(
9369               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9370           Args);
9371     }
9372   };
9373   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9374 }
9375 
9376 void CGOpenMPRuntime::emitTargetCall(
9377     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9378     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9379     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9380     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9381                                      const OMPLoopDirective &D)>
9382         SizeEmitter) {
9383   if (!CGF.HaveInsertPoint())
9384     return;
9385 
9386   assert(OutlinedFn && "Invalid outlined function!");
9387 
9388   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9389   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9390   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9391   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9392                                             PrePostActionTy &) {
9393     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9394   };
9395   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9396 
9397   CodeGenFunction::OMPTargetDataInfo InputInfo;
9398   llvm::Value *MapTypesArray = nullptr;
9399   // Fill up the pointer arrays and transfer execution to the device.
9400   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9401                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9402                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9403     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9404       // Reverse offloading is not supported, so just execute on the host.
9405       if (RequiresOuterTask) {
9406         CapturedVars.clear();
9407         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9408       }
9409       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9410       return;
9411     }
9412 
9413     // On top of the arrays that were filled up, the target offloading call
9414     // takes as arguments the device id as well as the host pointer. The host
9415     // pointer is used by the runtime library to identify the current target
9416     // region, so it only has to be unique and not necessarily point to
9417     // anything. It could be the pointer to the outlined function that
9418     // implements the target region, but we aren't using that so that the
9419     // compiler doesn't need to keep that, and could therefore inline the host
9420     // function if proven worthwhile during optimization.
9421 
9422     // From this point on, we need to have an ID of the target region defined.
9423     assert(OutlinedFnID && "Invalid outlined function ID!");
9424 
9425     // Emit device ID if any.
9426     llvm::Value *DeviceID;
9427     if (Device.getPointer()) {
9428       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9429               Device.getInt() == OMPC_DEVICE_device_num) &&
9430              "Expected device_num modifier.");
9431       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9432       DeviceID =
9433           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9434     } else {
9435       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9436     }
9437 
9438     // Emit the number of elements in the offloading arrays.
9439     llvm::Value *PointerNum =
9440         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9441 
9442     // Return value of the runtime offloading call.
9443     llvm::Value *Return;
9444 
9445     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9446     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9447 
9448     // Emit tripcount for the target loop-based directive.
9449     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9450 
9451     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9452     // The target region is an outlined function launched by the runtime
9453     // via calls __tgt_target() or __tgt_target_teams().
9454     //
9455     // __tgt_target() launches a target region with one team and one thread,
9456     // executing a serial region.  This master thread may in turn launch
9457     // more threads within its team upon encountering a parallel region,
9458     // however, no additional teams can be launched on the device.
9459     //
9460     // __tgt_target_teams() launches a target region with one or more teams,
9461     // each with one or more threads.  This call is required for target
9462     // constructs such as:
9463     //  'target teams'
9464     //  'target' / 'teams'
9465     //  'target teams distribute parallel for'
9466     //  'target parallel'
9467     // and so on.
9468     //
9469     // Note that on the host and CPU targets, the runtime implementation of
9470     // these calls simply call the outlined function without forking threads.
9471     // The outlined functions themselves have runtime calls to
9472     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9473     // the compiler in emitTeamsCall() and emitParallelCall().
9474     //
9475     // In contrast, on the NVPTX target, the implementation of
9476     // __tgt_target_teams() launches a GPU kernel with the requested number
9477     // of teams and threads so no additional calls to the runtime are required.
9478     if (NumTeams) {
9479       // If we have NumTeams defined this means that we have an enclosed teams
9480       // region. Therefore we also expect to have NumThreads defined. These two
9481       // values should be defined in the presence of a teams directive,
9482       // regardless of having any clauses associated. If the user is using teams
9483       // but no clauses, these two values will be the default that should be
9484       // passed to the runtime library - a 32-bit integer with the value zero.
9485       assert(NumThreads && "Thread limit expression should be available along "
9486                            "with number of teams.");
9487       llvm::Value *OffloadingArgs[] = {DeviceID,
9488                                        OutlinedFnID,
9489                                        PointerNum,
9490                                        InputInfo.BasePointersArray.getPointer(),
9491                                        InputInfo.PointersArray.getPointer(),
9492                                        InputInfo.SizesArray.getPointer(),
9493                                        MapTypesArray,
9494                                        InputInfo.MappersArray.getPointer(),
9495                                        NumTeams,
9496                                        NumThreads};
9497       Return = CGF.EmitRuntimeCall(
9498           OMPBuilder.getOrCreateRuntimeFunction(
9499               CGM.getModule(), HasNowait
9500                                    ? OMPRTL___tgt_target_teams_nowait_mapper
9501                                    : OMPRTL___tgt_target_teams_mapper),
9502           OffloadingArgs);
9503     } else {
9504       llvm::Value *OffloadingArgs[] = {DeviceID,
9505                                        OutlinedFnID,
9506                                        PointerNum,
9507                                        InputInfo.BasePointersArray.getPointer(),
9508                                        InputInfo.PointersArray.getPointer(),
9509                                        InputInfo.SizesArray.getPointer(),
9510                                        MapTypesArray,
9511                                        InputInfo.MappersArray.getPointer()};
9512       Return = CGF.EmitRuntimeCall(
9513           OMPBuilder.getOrCreateRuntimeFunction(
9514               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
9515                                          : OMPRTL___tgt_target_mapper),
9516           OffloadingArgs);
9517     }
9518 
9519     // Check the error code and execute the host version if required.
9520     llvm::BasicBlock *OffloadFailedBlock =
9521         CGF.createBasicBlock("omp_offload.failed");
9522     llvm::BasicBlock *OffloadContBlock =
9523         CGF.createBasicBlock("omp_offload.cont");
9524     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9525     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9526 
9527     CGF.EmitBlock(OffloadFailedBlock);
9528     if (RequiresOuterTask) {
9529       CapturedVars.clear();
9530       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9531     }
9532     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9533     CGF.EmitBranch(OffloadContBlock);
9534 
9535     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9536   };
9537 
9538   // Notify that the host version must be executed.
9539   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9540                     RequiresOuterTask](CodeGenFunction &CGF,
9541                                        PrePostActionTy &) {
9542     if (RequiresOuterTask) {
9543       CapturedVars.clear();
9544       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9545     }
9546     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9547   };
9548 
9549   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9550                           &CapturedVars, RequiresOuterTask,
9551                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9552     // Fill up the arrays with all the captured variables.
9553     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9554 
9555     // Get mappable expression information.
9556     MappableExprsHandler MEHandler(D, CGF);
9557     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9558     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9559 
9560     auto RI = CS.getCapturedRecordDecl()->field_begin();
9561     auto CV = CapturedVars.begin();
9562     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9563                                               CE = CS.capture_end();
9564          CI != CE; ++CI, ++RI, ++CV) {
9565       MappableExprsHandler::MapCombinedInfoTy CurInfo;
9566       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9567 
9568       // VLA sizes are passed to the outlined region by copy and do not have map
9569       // information associated.
9570       if (CI->capturesVariableArrayType()) {
9571         CurInfo.BasePointers.push_back(*CV);
9572         CurInfo.Pointers.push_back(*CV);
9573         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9574             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9575         // Copy to the device as an argument. No need to retrieve it.
9576         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9577                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9578                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
9579         CurInfo.Mappers.push_back(nullptr);
9580       } else {
9581         // If we have any information in the map clause, we use it, otherwise we
9582         // just do a default mapping.
9583         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9584         if (!CI->capturesThis())
9585           MappedVarSet.insert(CI->getCapturedVar());
9586         else
9587           MappedVarSet.insert(nullptr);
9588         if (CurInfo.BasePointers.empty())
9589           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9590         // Generate correct mapping for variables captured by reference in
9591         // lambdas.
9592         if (CI->capturesVariable())
9593           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9594                                                   CurInfo, LambdaPointers);
9595       }
9596       // We expect to have at least an element of information for this capture.
9597       assert(!CurInfo.BasePointers.empty() &&
9598              "Non-existing map pointer for capture!");
9599       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9600              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9601              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9602              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9603              "Inconsistent map information sizes!");
9604 
9605       // If there is an entry in PartialStruct it means we have a struct with
9606       // individual members mapped. Emit an extra combined entry.
9607       if (PartialStruct.Base.isValid())
9608         MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
9609 
9610       // We need to append the results of this capture to what we already have.
9611       CombinedInfo.append(CurInfo);
9612     }
9613     // Adjust MEMBER_OF flags for the lambdas captures.
9614     MEHandler.adjustMemberOfForLambdaCaptures(
9615         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
9616         CombinedInfo.Types);
9617     // Map any list items in a map clause that were not captures because they
9618     // weren't referenced within the construct.
9619     MEHandler.generateAllInfo(CombinedInfo, /*NotTargetParams=*/true,
9620                               MappedVarSet);
9621 
9622     TargetDataInfo Info;
9623     // Fill up the arrays and create the arguments.
9624     emitOffloadingArrays(CGF, CombinedInfo, Info);
9625     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9626                                  Info.PointersArray, Info.SizesArray,
9627                                  Info.MapTypesArray, Info.MappersArray, Info);
9628     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9629     InputInfo.BasePointersArray =
9630         Address(Info.BasePointersArray, CGM.getPointerAlign());
9631     InputInfo.PointersArray =
9632         Address(Info.PointersArray, CGM.getPointerAlign());
9633     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9634     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
9635     MapTypesArray = Info.MapTypesArray;
9636     if (RequiresOuterTask)
9637       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9638     else
9639       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9640   };
9641 
9642   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9643                              CodeGenFunction &CGF, PrePostActionTy &) {
9644     if (RequiresOuterTask) {
9645       CodeGenFunction::OMPTargetDataInfo InputInfo;
9646       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9647     } else {
9648       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9649     }
9650   };
9651 
9652   // If we have a target function ID it means that we need to support
9653   // offloading, otherwise, just execute on the host. We need to execute on host
9654   // regardless of the conditional in the if clause if, e.g., the user do not
9655   // specify target triples.
9656   if (OutlinedFnID) {
9657     if (IfCond) {
9658       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9659     } else {
9660       RegionCodeGenTy ThenRCG(TargetThenGen);
9661       ThenRCG(CGF);
9662     }
9663   } else {
9664     RegionCodeGenTy ElseRCG(TargetElseGen);
9665     ElseRCG(CGF);
9666   }
9667 }
9668 
9669 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9670                                                     StringRef ParentName) {
9671   if (!S)
9672     return;
9673 
9674   // Codegen OMP target directives that offload compute to the device.
9675   bool RequiresDeviceCodegen =
9676       isa<OMPExecutableDirective>(S) &&
9677       isOpenMPTargetExecutionDirective(
9678           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9679 
9680   if (RequiresDeviceCodegen) {
9681     const auto &E = *cast<OMPExecutableDirective>(S);
9682     unsigned DeviceID;
9683     unsigned FileID;
9684     unsigned Line;
9685     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9686                              FileID, Line);
9687 
9688     // Is this a target region that should not be emitted as an entry point? If
9689     // so just signal we are done with this target region.
9690     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9691                                                             ParentName, Line))
9692       return;
9693 
9694     switch (E.getDirectiveKind()) {
9695     case OMPD_target:
9696       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9697                                                    cast<OMPTargetDirective>(E));
9698       break;
9699     case OMPD_target_parallel:
9700       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9701           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9702       break;
9703     case OMPD_target_teams:
9704       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9705           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9706       break;
9707     case OMPD_target_teams_distribute:
9708       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9709           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9710       break;
9711     case OMPD_target_teams_distribute_simd:
9712       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9713           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9714       break;
9715     case OMPD_target_parallel_for:
9716       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9717           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9718       break;
9719     case OMPD_target_parallel_for_simd:
9720       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9721           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9722       break;
9723     case OMPD_target_simd:
9724       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9725           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9726       break;
9727     case OMPD_target_teams_distribute_parallel_for:
9728       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9729           CGM, ParentName,
9730           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9731       break;
9732     case OMPD_target_teams_distribute_parallel_for_simd:
9733       CodeGenFunction::
9734           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9735               CGM, ParentName,
9736               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9737       break;
9738     case OMPD_parallel:
9739     case OMPD_for:
9740     case OMPD_parallel_for:
9741     case OMPD_parallel_master:
9742     case OMPD_parallel_sections:
9743     case OMPD_for_simd:
9744     case OMPD_parallel_for_simd:
9745     case OMPD_cancel:
9746     case OMPD_cancellation_point:
9747     case OMPD_ordered:
9748     case OMPD_threadprivate:
9749     case OMPD_allocate:
9750     case OMPD_task:
9751     case OMPD_simd:
9752     case OMPD_sections:
9753     case OMPD_section:
9754     case OMPD_single:
9755     case OMPD_master:
9756     case OMPD_critical:
9757     case OMPD_taskyield:
9758     case OMPD_barrier:
9759     case OMPD_taskwait:
9760     case OMPD_taskgroup:
9761     case OMPD_atomic:
9762     case OMPD_flush:
9763     case OMPD_depobj:
9764     case OMPD_scan:
9765     case OMPD_teams:
9766     case OMPD_target_data:
9767     case OMPD_target_exit_data:
9768     case OMPD_target_enter_data:
9769     case OMPD_distribute:
9770     case OMPD_distribute_simd:
9771     case OMPD_distribute_parallel_for:
9772     case OMPD_distribute_parallel_for_simd:
9773     case OMPD_teams_distribute:
9774     case OMPD_teams_distribute_simd:
9775     case OMPD_teams_distribute_parallel_for:
9776     case OMPD_teams_distribute_parallel_for_simd:
9777     case OMPD_target_update:
9778     case OMPD_declare_simd:
9779     case OMPD_declare_variant:
9780     case OMPD_begin_declare_variant:
9781     case OMPD_end_declare_variant:
9782     case OMPD_declare_target:
9783     case OMPD_end_declare_target:
9784     case OMPD_declare_reduction:
9785     case OMPD_declare_mapper:
9786     case OMPD_taskloop:
9787     case OMPD_taskloop_simd:
9788     case OMPD_master_taskloop:
9789     case OMPD_master_taskloop_simd:
9790     case OMPD_parallel_master_taskloop:
9791     case OMPD_parallel_master_taskloop_simd:
9792     case OMPD_requires:
9793     case OMPD_unknown:
9794     default:
9795       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9796     }
9797     return;
9798   }
9799 
9800   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9801     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9802       return;
9803 
9804     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9805     return;
9806   }
9807 
9808   // If this is a lambda function, look into its body.
9809   if (const auto *L = dyn_cast<LambdaExpr>(S))
9810     S = L->getBody();
9811 
9812   // Keep looking for target regions recursively.
9813   for (const Stmt *II : S->children())
9814     scanForTargetRegionsFunctions(II, ParentName);
9815 }
9816 
9817 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9818   // If emitting code for the host, we do not process FD here. Instead we do
9819   // the normal code generation.
9820   if (!CGM.getLangOpts().OpenMPIsDevice) {
9821     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9822       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9823           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9824       // Do not emit device_type(nohost) functions for the host.
9825       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9826         return true;
9827     }
9828     return false;
9829   }
9830 
9831   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9832   // Try to detect target regions in the function.
9833   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9834     StringRef Name = CGM.getMangledName(GD);
9835     scanForTargetRegionsFunctions(FD->getBody(), Name);
9836     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9837         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9838     // Do not emit device_type(nohost) functions for the host.
9839     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9840       return true;
9841   }
9842 
9843   // Do not to emit function if it is not marked as declare target.
9844   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9845          AlreadyEmittedTargetDecls.count(VD) == 0;
9846 }
9847 
9848 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9849   if (!CGM.getLangOpts().OpenMPIsDevice)
9850     return false;
9851 
9852   // Check if there are Ctors/Dtors in this declaration and look for target
9853   // regions in it. We use the complete variant to produce the kernel name
9854   // mangling.
9855   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9856   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9857     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9858       StringRef ParentName =
9859           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9860       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9861     }
9862     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9863       StringRef ParentName =
9864           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9865       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9866     }
9867   }
9868 
9869   // Do not to emit variable if it is not marked as declare target.
9870   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9871       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9872           cast<VarDecl>(GD.getDecl()));
9873   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9874       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9875        HasRequiresUnifiedSharedMemory)) {
9876     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9877     return true;
9878   }
9879   return false;
9880 }
9881 
9882 llvm::Constant *
9883 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9884                                                 const VarDecl *VD) {
9885   assert(VD->getType().isConstant(CGM.getContext()) &&
9886          "Expected constant variable.");
9887   StringRef VarName;
9888   llvm::Constant *Addr;
9889   llvm::GlobalValue::LinkageTypes Linkage;
9890   QualType Ty = VD->getType();
9891   SmallString<128> Buffer;
9892   {
9893     unsigned DeviceID;
9894     unsigned FileID;
9895     unsigned Line;
9896     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9897                              FileID, Line);
9898     llvm::raw_svector_ostream OS(Buffer);
9899     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9900        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9901     VarName = OS.str();
9902   }
9903   Linkage = llvm::GlobalValue::InternalLinkage;
9904   Addr =
9905       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9906                                   getDefaultFirstprivateAddressSpace());
9907   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9908   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9909   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9910   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9911       VarName, Addr, VarSize,
9912       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9913   return Addr;
9914 }
9915 
9916 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9917                                                    llvm::Constant *Addr) {
9918   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9919       !CGM.getLangOpts().OpenMPIsDevice)
9920     return;
9921   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9922       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9923   if (!Res) {
9924     if (CGM.getLangOpts().OpenMPIsDevice) {
9925       // Register non-target variables being emitted in device code (debug info
9926       // may cause this).
9927       StringRef VarName = CGM.getMangledName(VD);
9928       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9929     }
9930     return;
9931   }
9932   // Register declare target variables.
9933   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9934   StringRef VarName;
9935   CharUnits VarSize;
9936   llvm::GlobalValue::LinkageTypes Linkage;
9937 
9938   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9939       !HasRequiresUnifiedSharedMemory) {
9940     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9941     VarName = CGM.getMangledName(VD);
9942     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9943       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9944       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9945     } else {
9946       VarSize = CharUnits::Zero();
9947     }
9948     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9949     // Temp solution to prevent optimizations of the internal variables.
9950     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9951       std::string RefName = getName({VarName, "ref"});
9952       if (!CGM.GetGlobalValue(RefName)) {
9953         llvm::Constant *AddrRef =
9954             getOrCreateInternalVariable(Addr->getType(), RefName);
9955         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9956         GVAddrRef->setConstant(/*Val=*/true);
9957         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9958         GVAddrRef->setInitializer(Addr);
9959         CGM.addCompilerUsedGlobal(GVAddrRef);
9960       }
9961     }
9962   } else {
9963     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9964             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9965              HasRequiresUnifiedSharedMemory)) &&
9966            "Declare target attribute must link or to with unified memory.");
9967     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9968       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9969     else
9970       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9971 
9972     if (CGM.getLangOpts().OpenMPIsDevice) {
9973       VarName = Addr->getName();
9974       Addr = nullptr;
9975     } else {
9976       VarName = getAddrOfDeclareTargetVar(VD).getName();
9977       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9978     }
9979     VarSize = CGM.getPointerSize();
9980     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9981   }
9982 
9983   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9984       VarName, Addr, VarSize, Flags, Linkage);
9985 }
9986 
9987 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9988   if (isa<FunctionDecl>(GD.getDecl()) ||
9989       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9990     return emitTargetFunctions(GD);
9991 
9992   return emitTargetGlobalVariable(GD);
9993 }
9994 
9995 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9996   for (const VarDecl *VD : DeferredGlobalVariables) {
9997     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9998         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9999     if (!Res)
10000       continue;
10001     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10002         !HasRequiresUnifiedSharedMemory) {
10003       CGM.EmitGlobal(VD);
10004     } else {
10005       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10006               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10007                HasRequiresUnifiedSharedMemory)) &&
10008              "Expected link clause or to clause with unified memory.");
10009       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10010     }
10011   }
10012 }
10013 
10014 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10015     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10016   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10017          " Expected target-based directive.");
10018 }
10019 
10020 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10021   for (const OMPClause *Clause : D->clauselists()) {
10022     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10023       HasRequiresUnifiedSharedMemory = true;
10024     } else if (const auto *AC =
10025                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10026       switch (AC->getAtomicDefaultMemOrderKind()) {
10027       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10028         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10029         break;
10030       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10031         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10032         break;
10033       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10034         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10035         break;
10036       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10037         break;
10038       }
10039     }
10040   }
10041 }
10042 
10043 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10044   return RequiresAtomicOrdering;
10045 }
10046 
10047 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10048                                                        LangAS &AS) {
10049   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10050     return false;
10051   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10052   switch(A->getAllocatorType()) {
10053   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10054   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10055   // Not supported, fallback to the default mem space.
10056   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10057   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10058   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10059   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10060   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10061   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10062   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10063     AS = LangAS::Default;
10064     return true;
10065   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10066     llvm_unreachable("Expected predefined allocator for the variables with the "
10067                      "static storage.");
10068   }
10069   return false;
10070 }
10071 
10072 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10073   return HasRequiresUnifiedSharedMemory;
10074 }
10075 
10076 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10077     CodeGenModule &CGM)
10078     : CGM(CGM) {
10079   if (CGM.getLangOpts().OpenMPIsDevice) {
10080     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10081     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10082   }
10083 }
10084 
10085 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10086   if (CGM.getLangOpts().OpenMPIsDevice)
10087     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10088 }
10089 
10090 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10091   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10092     return true;
10093 
10094   const auto *D = cast<FunctionDecl>(GD.getDecl());
10095   // Do not to emit function if it is marked as declare target as it was already
10096   // emitted.
10097   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10098     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10099       if (auto *F = dyn_cast_or_null<llvm::Function>(
10100               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10101         return !F->isDeclaration();
10102       return false;
10103     }
10104     return true;
10105   }
10106 
10107   return !AlreadyEmittedTargetDecls.insert(D).second;
10108 }
10109 
10110 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10111   // If we don't have entries or if we are emitting code for the device, we
10112   // don't need to do anything.
10113   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10114       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10115       (OffloadEntriesInfoManager.empty() &&
10116        !HasEmittedDeclareTargetRegion &&
10117        !HasEmittedTargetRegion))
10118     return nullptr;
10119 
10120   // Create and register the function that handles the requires directives.
10121   ASTContext &C = CGM.getContext();
10122 
10123   llvm::Function *RequiresRegFn;
10124   {
10125     CodeGenFunction CGF(CGM);
10126     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10127     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10128     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10129     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10130     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10131     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10132     // TODO: check for other requires clauses.
10133     // The requires directive takes effect only when a target region is
10134     // present in the compilation unit. Otherwise it is ignored and not
10135     // passed to the runtime. This avoids the runtime from throwing an error
10136     // for mismatching requires clauses across compilation units that don't
10137     // contain at least 1 target region.
10138     assert((HasEmittedTargetRegion ||
10139             HasEmittedDeclareTargetRegion ||
10140             !OffloadEntriesInfoManager.empty()) &&
10141            "Target or declare target region expected.");
10142     if (HasRequiresUnifiedSharedMemory)
10143       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10144     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10145                             CGM.getModule(), OMPRTL___tgt_register_requires),
10146                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10147     CGF.FinishFunction();
10148   }
10149   return RequiresRegFn;
10150 }
10151 
10152 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10153                                     const OMPExecutableDirective &D,
10154                                     SourceLocation Loc,
10155                                     llvm::Function *OutlinedFn,
10156                                     ArrayRef<llvm::Value *> CapturedVars) {
10157   if (!CGF.HaveInsertPoint())
10158     return;
10159 
10160   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10161   CodeGenFunction::RunCleanupsScope Scope(CGF);
10162 
10163   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10164   llvm::Value *Args[] = {
10165       RTLoc,
10166       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10167       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10168   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10169   RealArgs.append(std::begin(Args), std::end(Args));
10170   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10171 
10172   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10173       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10174   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10175 }
10176 
10177 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10178                                          const Expr *NumTeams,
10179                                          const Expr *ThreadLimit,
10180                                          SourceLocation Loc) {
10181   if (!CGF.HaveInsertPoint())
10182     return;
10183 
10184   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10185 
10186   llvm::Value *NumTeamsVal =
10187       NumTeams
10188           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10189                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10190           : CGF.Builder.getInt32(0);
10191 
10192   llvm::Value *ThreadLimitVal =
10193       ThreadLimit
10194           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10195                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10196           : CGF.Builder.getInt32(0);
10197 
10198   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10199   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10200                                      ThreadLimitVal};
10201   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10202                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10203                       PushNumTeamsArgs);
10204 }
10205 
10206 void CGOpenMPRuntime::emitTargetDataCalls(
10207     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10208     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10209   if (!CGF.HaveInsertPoint())
10210     return;
10211 
10212   // Action used to replace the default codegen action and turn privatization
10213   // off.
10214   PrePostActionTy NoPrivAction;
10215 
10216   // Generate the code for the opening of the data environment. Capture all the
10217   // arguments of the runtime call by reference because they are used in the
10218   // closing of the region.
10219   auto &&BeginThenGen = [this, &D, Device, &Info,
10220                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10221     // Fill up the arrays with all the mapped variables.
10222     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10223 
10224     // Get map clause information.
10225     MappableExprsHandler MEHandler(D, CGF);
10226     MEHandler.generateAllInfo(CombinedInfo);
10227 
10228     // Fill up the arrays and create the arguments.
10229     emitOffloadingArrays(CGF, CombinedInfo, Info);
10230 
10231     llvm::Value *BasePointersArrayArg = nullptr;
10232     llvm::Value *PointersArrayArg = nullptr;
10233     llvm::Value *SizesArrayArg = nullptr;
10234     llvm::Value *MapTypesArrayArg = nullptr;
10235     llvm::Value *MappersArrayArg = nullptr;
10236     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10237                                  SizesArrayArg, MapTypesArrayArg,
10238                                  MappersArrayArg, Info, /*ForEndCall=*/false);
10239 
10240     // Emit device ID if any.
10241     llvm::Value *DeviceID = nullptr;
10242     if (Device) {
10243       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10244                                            CGF.Int64Ty, /*isSigned=*/true);
10245     } else {
10246       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10247     }
10248 
10249     // Emit the number of elements in the offloading arrays.
10250     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10251 
10252     llvm::Value *OffloadingArgs[] = {
10253         DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
10254         SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
10255     CGF.EmitRuntimeCall(
10256         OMPBuilder.getOrCreateRuntimeFunction(
10257             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10258         OffloadingArgs);
10259 
10260     // If device pointer privatization is required, emit the body of the region
10261     // here. It will have to be duplicated: with and without privatization.
10262     if (!Info.CaptureDeviceAddrMap.empty())
10263       CodeGen(CGF);
10264   };
10265 
10266   // Generate code for the closing of the data region.
10267   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10268                                             PrePostActionTy &) {
10269     assert(Info.isValid() && "Invalid data environment closing arguments.");
10270 
10271     llvm::Value *BasePointersArrayArg = nullptr;
10272     llvm::Value *PointersArrayArg = nullptr;
10273     llvm::Value *SizesArrayArg = nullptr;
10274     llvm::Value *MapTypesArrayArg = nullptr;
10275     llvm::Value *MappersArrayArg = nullptr;
10276     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10277                                  SizesArrayArg, MapTypesArrayArg,
10278                                  MappersArrayArg, Info, /*ForEndCall=*/true);
10279 
10280     // Emit device ID if any.
10281     llvm::Value *DeviceID = nullptr;
10282     if (Device) {
10283       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10284                                            CGF.Int64Ty, /*isSigned=*/true);
10285     } else {
10286       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10287     }
10288 
10289     // Emit the number of elements in the offloading arrays.
10290     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10291 
10292     llvm::Value *OffloadingArgs[] = {
10293         DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
10294         SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
10295     CGF.EmitRuntimeCall(
10296         OMPBuilder.getOrCreateRuntimeFunction(
10297             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10298         OffloadingArgs);
10299   };
10300 
10301   // If we need device pointer privatization, we need to emit the body of the
10302   // region with no privatization in the 'else' branch of the conditional.
10303   // Otherwise, we don't have to do anything.
10304   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10305                                                          PrePostActionTy &) {
10306     if (!Info.CaptureDeviceAddrMap.empty()) {
10307       CodeGen.setAction(NoPrivAction);
10308       CodeGen(CGF);
10309     }
10310   };
10311 
10312   // We don't have to do anything to close the region if the if clause evaluates
10313   // to false.
10314   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10315 
10316   if (IfCond) {
10317     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10318   } else {
10319     RegionCodeGenTy RCG(BeginThenGen);
10320     RCG(CGF);
10321   }
10322 
10323   // If we don't require privatization of device pointers, we emit the body in
10324   // between the runtime calls. This avoids duplicating the body code.
10325   if (Info.CaptureDeviceAddrMap.empty()) {
10326     CodeGen.setAction(NoPrivAction);
10327     CodeGen(CGF);
10328   }
10329 
10330   if (IfCond) {
10331     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10332   } else {
10333     RegionCodeGenTy RCG(EndThenGen);
10334     RCG(CGF);
10335   }
10336 }
10337 
10338 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10339     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10340     const Expr *Device) {
10341   if (!CGF.HaveInsertPoint())
10342     return;
10343 
10344   assert((isa<OMPTargetEnterDataDirective>(D) ||
10345           isa<OMPTargetExitDataDirective>(D) ||
10346           isa<OMPTargetUpdateDirective>(D)) &&
10347          "Expecting either target enter, exit data, or update directives.");
10348 
10349   CodeGenFunction::OMPTargetDataInfo InputInfo;
10350   llvm::Value *MapTypesArray = nullptr;
10351   // Generate the code for the opening of the data environment.
10352   auto &&ThenGen = [this, &D, Device, &InputInfo,
10353                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10354     // Emit device ID if any.
10355     llvm::Value *DeviceID = nullptr;
10356     if (Device) {
10357       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10358                                            CGF.Int64Ty, /*isSigned=*/true);
10359     } else {
10360       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10361     }
10362 
10363     // Emit the number of elements in the offloading arrays.
10364     llvm::Constant *PointerNum =
10365         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10366 
10367     llvm::Value *OffloadingArgs[] = {DeviceID,
10368                                      PointerNum,
10369                                      InputInfo.BasePointersArray.getPointer(),
10370                                      InputInfo.PointersArray.getPointer(),
10371                                      InputInfo.SizesArray.getPointer(),
10372                                      MapTypesArray,
10373                                      InputInfo.MappersArray.getPointer()};
10374 
10375     // Select the right runtime function call for each standalone
10376     // directive.
10377     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10378     RuntimeFunction RTLFn;
10379     switch (D.getDirectiveKind()) {
10380     case OMPD_target_enter_data:
10381       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10382                         : OMPRTL___tgt_target_data_begin_mapper;
10383       break;
10384     case OMPD_target_exit_data:
10385       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10386                         : OMPRTL___tgt_target_data_end_mapper;
10387       break;
10388     case OMPD_target_update:
10389       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10390                         : OMPRTL___tgt_target_data_update_mapper;
10391       break;
10392     case OMPD_parallel:
10393     case OMPD_for:
10394     case OMPD_parallel_for:
10395     case OMPD_parallel_master:
10396     case OMPD_parallel_sections:
10397     case OMPD_for_simd:
10398     case OMPD_parallel_for_simd:
10399     case OMPD_cancel:
10400     case OMPD_cancellation_point:
10401     case OMPD_ordered:
10402     case OMPD_threadprivate:
10403     case OMPD_allocate:
10404     case OMPD_task:
10405     case OMPD_simd:
10406     case OMPD_sections:
10407     case OMPD_section:
10408     case OMPD_single:
10409     case OMPD_master:
10410     case OMPD_critical:
10411     case OMPD_taskyield:
10412     case OMPD_barrier:
10413     case OMPD_taskwait:
10414     case OMPD_taskgroup:
10415     case OMPD_atomic:
10416     case OMPD_flush:
10417     case OMPD_depobj:
10418     case OMPD_scan:
10419     case OMPD_teams:
10420     case OMPD_target_data:
10421     case OMPD_distribute:
10422     case OMPD_distribute_simd:
10423     case OMPD_distribute_parallel_for:
10424     case OMPD_distribute_parallel_for_simd:
10425     case OMPD_teams_distribute:
10426     case OMPD_teams_distribute_simd:
10427     case OMPD_teams_distribute_parallel_for:
10428     case OMPD_teams_distribute_parallel_for_simd:
10429     case OMPD_declare_simd:
10430     case OMPD_declare_variant:
10431     case OMPD_begin_declare_variant:
10432     case OMPD_end_declare_variant:
10433     case OMPD_declare_target:
10434     case OMPD_end_declare_target:
10435     case OMPD_declare_reduction:
10436     case OMPD_declare_mapper:
10437     case OMPD_taskloop:
10438     case OMPD_taskloop_simd:
10439     case OMPD_master_taskloop:
10440     case OMPD_master_taskloop_simd:
10441     case OMPD_parallel_master_taskloop:
10442     case OMPD_parallel_master_taskloop_simd:
10443     case OMPD_target:
10444     case OMPD_target_simd:
10445     case OMPD_target_teams_distribute:
10446     case OMPD_target_teams_distribute_simd:
10447     case OMPD_target_teams_distribute_parallel_for:
10448     case OMPD_target_teams_distribute_parallel_for_simd:
10449     case OMPD_target_teams:
10450     case OMPD_target_parallel:
10451     case OMPD_target_parallel_for:
10452     case OMPD_target_parallel_for_simd:
10453     case OMPD_requires:
10454     case OMPD_unknown:
10455     default:
10456       llvm_unreachable("Unexpected standalone target data directive.");
10457       break;
10458     }
10459     CGF.EmitRuntimeCall(
10460         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10461         OffloadingArgs);
10462   };
10463 
10464   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10465                              CodeGenFunction &CGF, PrePostActionTy &) {
10466     // Fill up the arrays with all the mapped variables.
10467     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10468 
10469     // Get map clause information.
10470     MappableExprsHandler MEHandler(D, CGF);
10471     MEHandler.generateAllInfo(CombinedInfo);
10472 
10473     TargetDataInfo Info;
10474     // Fill up the arrays and create the arguments.
10475     emitOffloadingArrays(CGF, CombinedInfo, Info);
10476     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10477                                  Info.PointersArray, Info.SizesArray,
10478                                  Info.MapTypesArray, Info.MappersArray, Info);
10479     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10480     InputInfo.BasePointersArray =
10481         Address(Info.BasePointersArray, CGM.getPointerAlign());
10482     InputInfo.PointersArray =
10483         Address(Info.PointersArray, CGM.getPointerAlign());
10484     InputInfo.SizesArray =
10485         Address(Info.SizesArray, CGM.getPointerAlign());
10486     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10487     MapTypesArray = Info.MapTypesArray;
10488     if (D.hasClausesOfKind<OMPDependClause>())
10489       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10490     else
10491       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10492   };
10493 
10494   if (IfCond) {
10495     emitIfClause(CGF, IfCond, TargetThenGen,
10496                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10497   } else {
10498     RegionCodeGenTy ThenRCG(TargetThenGen);
10499     ThenRCG(CGF);
10500   }
10501 }
10502 
10503 namespace {
10504   /// Kind of parameter in a function with 'declare simd' directive.
10505   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10506   /// Attribute set of the parameter.
10507   struct ParamAttrTy {
10508     ParamKindTy Kind = Vector;
10509     llvm::APSInt StrideOrArg;
10510     llvm::APSInt Alignment;
10511   };
10512 } // namespace
10513 
10514 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10515                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10516   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10517   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10518   // of that clause. The VLEN value must be power of 2.
10519   // In other case the notion of the function`s "characteristic data type" (CDT)
10520   // is used to compute the vector length.
10521   // CDT is defined in the following order:
10522   //   a) For non-void function, the CDT is the return type.
10523   //   b) If the function has any non-uniform, non-linear parameters, then the
10524   //   CDT is the type of the first such parameter.
10525   //   c) If the CDT determined by a) or b) above is struct, union, or class
10526   //   type which is pass-by-value (except for the type that maps to the
10527   //   built-in complex data type), the characteristic data type is int.
10528   //   d) If none of the above three cases is applicable, the CDT is int.
10529   // The VLEN is then determined based on the CDT and the size of vector
10530   // register of that ISA for which current vector version is generated. The
10531   // VLEN is computed using the formula below:
10532   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10533   // where vector register size specified in section 3.2.1 Registers and the
10534   // Stack Frame of original AMD64 ABI document.
10535   QualType RetType = FD->getReturnType();
10536   if (RetType.isNull())
10537     return 0;
10538   ASTContext &C = FD->getASTContext();
10539   QualType CDT;
10540   if (!RetType.isNull() && !RetType->isVoidType()) {
10541     CDT = RetType;
10542   } else {
10543     unsigned Offset = 0;
10544     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10545       if (ParamAttrs[Offset].Kind == Vector)
10546         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10547       ++Offset;
10548     }
10549     if (CDT.isNull()) {
10550       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10551         if (ParamAttrs[I + Offset].Kind == Vector) {
10552           CDT = FD->getParamDecl(I)->getType();
10553           break;
10554         }
10555       }
10556     }
10557   }
10558   if (CDT.isNull())
10559     CDT = C.IntTy;
10560   CDT = CDT->getCanonicalTypeUnqualified();
10561   if (CDT->isRecordType() || CDT->isUnionType())
10562     CDT = C.IntTy;
10563   return C.getTypeSize(CDT);
10564 }
10565 
10566 static void
10567 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10568                            const llvm::APSInt &VLENVal,
10569                            ArrayRef<ParamAttrTy> ParamAttrs,
10570                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10571   struct ISADataTy {
10572     char ISA;
10573     unsigned VecRegSize;
10574   };
10575   ISADataTy ISAData[] = {
10576       {
10577           'b', 128
10578       }, // SSE
10579       {
10580           'c', 256
10581       }, // AVX
10582       {
10583           'd', 256
10584       }, // AVX2
10585       {
10586           'e', 512
10587       }, // AVX512
10588   };
10589   llvm::SmallVector<char, 2> Masked;
10590   switch (State) {
10591   case OMPDeclareSimdDeclAttr::BS_Undefined:
10592     Masked.push_back('N');
10593     Masked.push_back('M');
10594     break;
10595   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10596     Masked.push_back('N');
10597     break;
10598   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10599     Masked.push_back('M');
10600     break;
10601   }
10602   for (char Mask : Masked) {
10603     for (const ISADataTy &Data : ISAData) {
10604       SmallString<256> Buffer;
10605       llvm::raw_svector_ostream Out(Buffer);
10606       Out << "_ZGV" << Data.ISA << Mask;
10607       if (!VLENVal) {
10608         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10609         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10610         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10611       } else {
10612         Out << VLENVal;
10613       }
10614       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10615         switch (ParamAttr.Kind){
10616         case LinearWithVarStride:
10617           Out << 's' << ParamAttr.StrideOrArg;
10618           break;
10619         case Linear:
10620           Out << 'l';
10621           if (ParamAttr.StrideOrArg != 1)
10622             Out << ParamAttr.StrideOrArg;
10623           break;
10624         case Uniform:
10625           Out << 'u';
10626           break;
10627         case Vector:
10628           Out << 'v';
10629           break;
10630         }
10631         if (!!ParamAttr.Alignment)
10632           Out << 'a' << ParamAttr.Alignment;
10633       }
10634       Out << '_' << Fn->getName();
10635       Fn->addFnAttr(Out.str());
10636     }
10637   }
10638 }
10639 
10640 // This are the Functions that are needed to mangle the name of the
10641 // vector functions generated by the compiler, according to the rules
10642 // defined in the "Vector Function ABI specifications for AArch64",
10643 // available at
10644 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10645 
10646 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10647 ///
10648 /// TODO: Need to implement the behavior for reference marked with a
10649 /// var or no linear modifiers (1.b in the section). For this, we
10650 /// need to extend ParamKindTy to support the linear modifiers.
10651 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10652   QT = QT.getCanonicalType();
10653 
10654   if (QT->isVoidType())
10655     return false;
10656 
10657   if (Kind == ParamKindTy::Uniform)
10658     return false;
10659 
10660   if (Kind == ParamKindTy::Linear)
10661     return false;
10662 
10663   // TODO: Handle linear references with modifiers
10664 
10665   if (Kind == ParamKindTy::LinearWithVarStride)
10666     return false;
10667 
10668   return true;
10669 }
10670 
10671 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10672 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10673   QT = QT.getCanonicalType();
10674   unsigned Size = C.getTypeSize(QT);
10675 
10676   // Only scalars and complex within 16 bytes wide set PVB to true.
10677   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10678     return false;
10679 
10680   if (QT->isFloatingType())
10681     return true;
10682 
10683   if (QT->isIntegerType())
10684     return true;
10685 
10686   if (QT->isPointerType())
10687     return true;
10688 
10689   // TODO: Add support for complex types (section 3.1.2, item 2).
10690 
10691   return false;
10692 }
10693 
10694 /// Computes the lane size (LS) of a return type or of an input parameter,
10695 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10696 /// TODO: Add support for references, section 3.2.1, item 1.
10697 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10698   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10699     QualType PTy = QT.getCanonicalType()->getPointeeType();
10700     if (getAArch64PBV(PTy, C))
10701       return C.getTypeSize(PTy);
10702   }
10703   if (getAArch64PBV(QT, C))
10704     return C.getTypeSize(QT);
10705 
10706   return C.getTypeSize(C.getUIntPtrType());
10707 }
10708 
10709 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10710 // signature of the scalar function, as defined in 3.2.2 of the
10711 // AAVFABI.
10712 static std::tuple<unsigned, unsigned, bool>
10713 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10714   QualType RetType = FD->getReturnType().getCanonicalType();
10715 
10716   ASTContext &C = FD->getASTContext();
10717 
10718   bool OutputBecomesInput = false;
10719 
10720   llvm::SmallVector<unsigned, 8> Sizes;
10721   if (!RetType->isVoidType()) {
10722     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10723     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10724       OutputBecomesInput = true;
10725   }
10726   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10727     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10728     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10729   }
10730 
10731   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10732   // The LS of a function parameter / return value can only be a power
10733   // of 2, starting from 8 bits, up to 128.
10734   assert(std::all_of(Sizes.begin(), Sizes.end(),
10735                      [](unsigned Size) {
10736                        return Size == 8 || Size == 16 || Size == 32 ||
10737                               Size == 64 || Size == 128;
10738                      }) &&
10739          "Invalid size");
10740 
10741   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10742                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10743                          OutputBecomesInput);
10744 }
10745 
10746 /// Mangle the parameter part of the vector function name according to
10747 /// their OpenMP classification. The mangling function is defined in
10748 /// section 3.5 of the AAVFABI.
10749 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10750   SmallString<256> Buffer;
10751   llvm::raw_svector_ostream Out(Buffer);
10752   for (const auto &ParamAttr : ParamAttrs) {
10753     switch (ParamAttr.Kind) {
10754     case LinearWithVarStride:
10755       Out << "ls" << ParamAttr.StrideOrArg;
10756       break;
10757     case Linear:
10758       Out << 'l';
10759       // Don't print the step value if it is not present or if it is
10760       // equal to 1.
10761       if (ParamAttr.StrideOrArg != 1)
10762         Out << ParamAttr.StrideOrArg;
10763       break;
10764     case Uniform:
10765       Out << 'u';
10766       break;
10767     case Vector:
10768       Out << 'v';
10769       break;
10770     }
10771 
10772     if (!!ParamAttr.Alignment)
10773       Out << 'a' << ParamAttr.Alignment;
10774   }
10775 
10776   return std::string(Out.str());
10777 }
10778 
10779 // Function used to add the attribute. The parameter `VLEN` is
10780 // templated to allow the use of "x" when targeting scalable functions
10781 // for SVE.
10782 template <typename T>
10783 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10784                                  char ISA, StringRef ParSeq,
10785                                  StringRef MangledName, bool OutputBecomesInput,
10786                                  llvm::Function *Fn) {
10787   SmallString<256> Buffer;
10788   llvm::raw_svector_ostream Out(Buffer);
10789   Out << Prefix << ISA << LMask << VLEN;
10790   if (OutputBecomesInput)
10791     Out << "v";
10792   Out << ParSeq << "_" << MangledName;
10793   Fn->addFnAttr(Out.str());
10794 }
10795 
10796 // Helper function to generate the Advanced SIMD names depending on
10797 // the value of the NDS when simdlen is not present.
10798 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10799                                       StringRef Prefix, char ISA,
10800                                       StringRef ParSeq, StringRef MangledName,
10801                                       bool OutputBecomesInput,
10802                                       llvm::Function *Fn) {
10803   switch (NDS) {
10804   case 8:
10805     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10806                          OutputBecomesInput, Fn);
10807     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10808                          OutputBecomesInput, Fn);
10809     break;
10810   case 16:
10811     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10812                          OutputBecomesInput, Fn);
10813     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10814                          OutputBecomesInput, Fn);
10815     break;
10816   case 32:
10817     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10818                          OutputBecomesInput, Fn);
10819     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10820                          OutputBecomesInput, Fn);
10821     break;
10822   case 64:
10823   case 128:
10824     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10825                          OutputBecomesInput, Fn);
10826     break;
10827   default:
10828     llvm_unreachable("Scalar type is too wide.");
10829   }
10830 }
10831 
10832 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10833 static void emitAArch64DeclareSimdFunction(
10834     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10835     ArrayRef<ParamAttrTy> ParamAttrs,
10836     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10837     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10838 
10839   // Get basic data for building the vector signature.
10840   const auto Data = getNDSWDS(FD, ParamAttrs);
10841   const unsigned NDS = std::get<0>(Data);
10842   const unsigned WDS = std::get<1>(Data);
10843   const bool OutputBecomesInput = std::get<2>(Data);
10844 
10845   // Check the values provided via `simdlen` by the user.
10846   // 1. A `simdlen(1)` doesn't produce vector signatures,
10847   if (UserVLEN == 1) {
10848     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10849         DiagnosticsEngine::Warning,
10850         "The clause simdlen(1) has no effect when targeting aarch64.");
10851     CGM.getDiags().Report(SLoc, DiagID);
10852     return;
10853   }
10854 
10855   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10856   // Advanced SIMD output.
10857   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10858     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10859         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10860                                     "power of 2 when targeting Advanced SIMD.");
10861     CGM.getDiags().Report(SLoc, DiagID);
10862     return;
10863   }
10864 
10865   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10866   // limits.
10867   if (ISA == 's' && UserVLEN != 0) {
10868     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10869       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10870           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10871                                       "lanes in the architectural constraints "
10872                                       "for SVE (min is 128-bit, max is "
10873                                       "2048-bit, by steps of 128-bit)");
10874       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10875       return;
10876     }
10877   }
10878 
10879   // Sort out parameter sequence.
10880   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10881   StringRef Prefix = "_ZGV";
10882   // Generate simdlen from user input (if any).
10883   if (UserVLEN) {
10884     if (ISA == 's') {
10885       // SVE generates only a masked function.
10886       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10887                            OutputBecomesInput, Fn);
10888     } else {
10889       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10890       // Advanced SIMD generates one or two functions, depending on
10891       // the `[not]inbranch` clause.
10892       switch (State) {
10893       case OMPDeclareSimdDeclAttr::BS_Undefined:
10894         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10895                              OutputBecomesInput, Fn);
10896         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10897                              OutputBecomesInput, Fn);
10898         break;
10899       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10900         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10901                              OutputBecomesInput, Fn);
10902         break;
10903       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10904         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10905                              OutputBecomesInput, Fn);
10906         break;
10907       }
10908     }
10909   } else {
10910     // If no user simdlen is provided, follow the AAVFABI rules for
10911     // generating the vector length.
10912     if (ISA == 's') {
10913       // SVE, section 3.4.1, item 1.
10914       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10915                            OutputBecomesInput, Fn);
10916     } else {
10917       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10918       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10919       // two vector names depending on the use of the clause
10920       // `[not]inbranch`.
10921       switch (State) {
10922       case OMPDeclareSimdDeclAttr::BS_Undefined:
10923         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10924                                   OutputBecomesInput, Fn);
10925         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10926                                   OutputBecomesInput, Fn);
10927         break;
10928       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10929         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10930                                   OutputBecomesInput, Fn);
10931         break;
10932       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10933         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10934                                   OutputBecomesInput, Fn);
10935         break;
10936       }
10937     }
10938   }
10939 }
10940 
10941 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10942                                               llvm::Function *Fn) {
10943   ASTContext &C = CGM.getContext();
10944   FD = FD->getMostRecentDecl();
10945   // Map params to their positions in function decl.
10946   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10947   if (isa<CXXMethodDecl>(FD))
10948     ParamPositions.try_emplace(FD, 0);
10949   unsigned ParamPos = ParamPositions.size();
10950   for (const ParmVarDecl *P : FD->parameters()) {
10951     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10952     ++ParamPos;
10953   }
10954   while (FD) {
10955     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10956       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10957       // Mark uniform parameters.
10958       for (const Expr *E : Attr->uniforms()) {
10959         E = E->IgnoreParenImpCasts();
10960         unsigned Pos;
10961         if (isa<CXXThisExpr>(E)) {
10962           Pos = ParamPositions[FD];
10963         } else {
10964           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10965                                 ->getCanonicalDecl();
10966           Pos = ParamPositions[PVD];
10967         }
10968         ParamAttrs[Pos].Kind = Uniform;
10969       }
10970       // Get alignment info.
10971       auto NI = Attr->alignments_begin();
10972       for (const Expr *E : Attr->aligneds()) {
10973         E = E->IgnoreParenImpCasts();
10974         unsigned Pos;
10975         QualType ParmTy;
10976         if (isa<CXXThisExpr>(E)) {
10977           Pos = ParamPositions[FD];
10978           ParmTy = E->getType();
10979         } else {
10980           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10981                                 ->getCanonicalDecl();
10982           Pos = ParamPositions[PVD];
10983           ParmTy = PVD->getType();
10984         }
10985         ParamAttrs[Pos].Alignment =
10986             (*NI)
10987                 ? (*NI)->EvaluateKnownConstInt(C)
10988                 : llvm::APSInt::getUnsigned(
10989                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10990                           .getQuantity());
10991         ++NI;
10992       }
10993       // Mark linear parameters.
10994       auto SI = Attr->steps_begin();
10995       auto MI = Attr->modifiers_begin();
10996       for (const Expr *E : Attr->linears()) {
10997         E = E->IgnoreParenImpCasts();
10998         unsigned Pos;
10999         // Rescaling factor needed to compute the linear parameter
11000         // value in the mangled name.
11001         unsigned PtrRescalingFactor = 1;
11002         if (isa<CXXThisExpr>(E)) {
11003           Pos = ParamPositions[FD];
11004         } else {
11005           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11006                                 ->getCanonicalDecl();
11007           Pos = ParamPositions[PVD];
11008           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11009             PtrRescalingFactor = CGM.getContext()
11010                                      .getTypeSizeInChars(P->getPointeeType())
11011                                      .getQuantity();
11012         }
11013         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11014         ParamAttr.Kind = Linear;
11015         // Assuming a stride of 1, for `linear` without modifiers.
11016         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11017         if (*SI) {
11018           Expr::EvalResult Result;
11019           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11020             if (const auto *DRE =
11021                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11022               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11023                 ParamAttr.Kind = LinearWithVarStride;
11024                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11025                     ParamPositions[StridePVD->getCanonicalDecl()]);
11026               }
11027             }
11028           } else {
11029             ParamAttr.StrideOrArg = Result.Val.getInt();
11030           }
11031         }
11032         // If we are using a linear clause on a pointer, we need to
11033         // rescale the value of linear_step with the byte size of the
11034         // pointee type.
11035         if (Linear == ParamAttr.Kind)
11036           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11037         ++SI;
11038         ++MI;
11039       }
11040       llvm::APSInt VLENVal;
11041       SourceLocation ExprLoc;
11042       const Expr *VLENExpr = Attr->getSimdlen();
11043       if (VLENExpr) {
11044         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11045         ExprLoc = VLENExpr->getExprLoc();
11046       }
11047       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11048       if (CGM.getTriple().isX86()) {
11049         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11050       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11051         unsigned VLEN = VLENVal.getExtValue();
11052         StringRef MangledName = Fn->getName();
11053         if (CGM.getTarget().hasFeature("sve"))
11054           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11055                                          MangledName, 's', 128, Fn, ExprLoc);
11056         if (CGM.getTarget().hasFeature("neon"))
11057           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11058                                          MangledName, 'n', 128, Fn, ExprLoc);
11059       }
11060     }
11061     FD = FD->getPreviousDecl();
11062   }
11063 }
11064 
11065 namespace {
11066 /// Cleanup action for doacross support.
11067 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11068 public:
11069   static const int DoacrossFinArgs = 2;
11070 
11071 private:
11072   llvm::FunctionCallee RTLFn;
11073   llvm::Value *Args[DoacrossFinArgs];
11074 
11075 public:
11076   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11077                     ArrayRef<llvm::Value *> CallArgs)
11078       : RTLFn(RTLFn) {
11079     assert(CallArgs.size() == DoacrossFinArgs);
11080     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11081   }
11082   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11083     if (!CGF.HaveInsertPoint())
11084       return;
11085     CGF.EmitRuntimeCall(RTLFn, Args);
11086   }
11087 };
11088 } // namespace
11089 
11090 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11091                                        const OMPLoopDirective &D,
11092                                        ArrayRef<Expr *> NumIterations) {
11093   if (!CGF.HaveInsertPoint())
11094     return;
11095 
11096   ASTContext &C = CGM.getContext();
11097   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11098   RecordDecl *RD;
11099   if (KmpDimTy.isNull()) {
11100     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11101     //  kmp_int64 lo; // lower
11102     //  kmp_int64 up; // upper
11103     //  kmp_int64 st; // stride
11104     // };
11105     RD = C.buildImplicitRecord("kmp_dim");
11106     RD->startDefinition();
11107     addFieldToRecordDecl(C, RD, Int64Ty);
11108     addFieldToRecordDecl(C, RD, Int64Ty);
11109     addFieldToRecordDecl(C, RD, Int64Ty);
11110     RD->completeDefinition();
11111     KmpDimTy = C.getRecordType(RD);
11112   } else {
11113     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11114   }
11115   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11116   QualType ArrayTy =
11117       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11118 
11119   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11120   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11121   enum { LowerFD = 0, UpperFD, StrideFD };
11122   // Fill dims with data.
11123   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11124     LValue DimsLVal = CGF.MakeAddrLValue(
11125         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11126     // dims.upper = num_iterations;
11127     LValue UpperLVal = CGF.EmitLValueForField(
11128         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11129     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11130         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11131         Int64Ty, NumIterations[I]->getExprLoc());
11132     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11133     // dims.stride = 1;
11134     LValue StrideLVal = CGF.EmitLValueForField(
11135         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11136     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11137                           StrideLVal);
11138   }
11139 
11140   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11141   // kmp_int32 num_dims, struct kmp_dim * dims);
11142   llvm::Value *Args[] = {
11143       emitUpdateLocation(CGF, D.getBeginLoc()),
11144       getThreadID(CGF, D.getBeginLoc()),
11145       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11146       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11147           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11148           CGM.VoidPtrTy)};
11149 
11150   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11151       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11152   CGF.EmitRuntimeCall(RTLFn, Args);
11153   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11154       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11155   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11156       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11157   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11158                                              llvm::makeArrayRef(FiniArgs));
11159 }
11160 
11161 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11162                                           const OMPDependClause *C) {
11163   QualType Int64Ty =
11164       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11165   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11166   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11167       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11168   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11169   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11170     const Expr *CounterVal = C->getLoopData(I);
11171     assert(CounterVal);
11172     llvm::Value *CntVal = CGF.EmitScalarConversion(
11173         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11174         CounterVal->getExprLoc());
11175     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11176                           /*Volatile=*/false, Int64Ty);
11177   }
11178   llvm::Value *Args[] = {
11179       emitUpdateLocation(CGF, C->getBeginLoc()),
11180       getThreadID(CGF, C->getBeginLoc()),
11181       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11182   llvm::FunctionCallee RTLFn;
11183   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11184     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11185                                                   OMPRTL___kmpc_doacross_post);
11186   } else {
11187     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11188     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11189                                                   OMPRTL___kmpc_doacross_wait);
11190   }
11191   CGF.EmitRuntimeCall(RTLFn, Args);
11192 }
11193 
11194 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11195                                llvm::FunctionCallee Callee,
11196                                ArrayRef<llvm::Value *> Args) const {
11197   assert(Loc.isValid() && "Outlined function call location must be valid.");
11198   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11199 
11200   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11201     if (Fn->doesNotThrow()) {
11202       CGF.EmitNounwindRuntimeCall(Fn, Args);
11203       return;
11204     }
11205   }
11206   CGF.EmitRuntimeCall(Callee, Args);
11207 }
11208 
11209 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11210     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11211     ArrayRef<llvm::Value *> Args) const {
11212   emitCall(CGF, Loc, OutlinedFn, Args);
11213 }
11214 
11215 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11216   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11217     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11218       HasEmittedDeclareTargetRegion = true;
11219 }
11220 
11221 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11222                                              const VarDecl *NativeParam,
11223                                              const VarDecl *TargetParam) const {
11224   return CGF.GetAddrOfLocalVar(NativeParam);
11225 }
11226 
11227 namespace {
11228 /// Cleanup action for allocate support.
11229 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11230 public:
11231   static const int CleanupArgs = 3;
11232 
11233 private:
11234   llvm::FunctionCallee RTLFn;
11235   llvm::Value *Args[CleanupArgs];
11236 
11237 public:
11238   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11239                        ArrayRef<llvm::Value *> CallArgs)
11240       : RTLFn(RTLFn) {
11241     assert(CallArgs.size() == CleanupArgs &&
11242            "Size of arguments does not match.");
11243     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11244   }
11245   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11246     if (!CGF.HaveInsertPoint())
11247       return;
11248     CGF.EmitRuntimeCall(RTLFn, Args);
11249   }
11250 };
11251 } // namespace
11252 
11253 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11254                                                    const VarDecl *VD) {
11255   if (!VD)
11256     return Address::invalid();
11257   const VarDecl *CVD = VD->getCanonicalDecl();
11258   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11259     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11260     // Use the default allocation.
11261     if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11262          AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11263         !AA->getAllocator())
11264       return Address::invalid();
11265     llvm::Value *Size;
11266     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11267     if (CVD->getType()->isVariablyModifiedType()) {
11268       Size = CGF.getTypeSize(CVD->getType());
11269       // Align the size: ((size + align - 1) / align) * align
11270       Size = CGF.Builder.CreateNUWAdd(
11271           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11272       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11273       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11274     } else {
11275       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11276       Size = CGM.getSize(Sz.alignTo(Align));
11277     }
11278     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11279     assert(AA->getAllocator() &&
11280            "Expected allocator expression for non-default allocator.");
11281     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11282     // According to the standard, the original allocator type is a enum
11283     // (integer). Convert to pointer type, if required.
11284     if (Allocator->getType()->isIntegerTy())
11285       Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11286     else if (Allocator->getType()->isPointerTy())
11287       Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11288           Allocator, CGM.VoidPtrTy);
11289     llvm::Value *Args[] = {ThreadID, Size, Allocator};
11290 
11291     llvm::Value *Addr =
11292         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11293                                 CGM.getModule(), OMPRTL___kmpc_alloc),
11294                             Args, getName({CVD->getName(), ".void.addr"}));
11295     llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11296                                                                 Allocator};
11297     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11298         CGM.getModule(), OMPRTL___kmpc_free);
11299 
11300     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11301                                                   llvm::makeArrayRef(FiniArgs));
11302     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11303         Addr,
11304         CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11305         getName({CVD->getName(), ".addr"}));
11306     return Address(Addr, Align);
11307   }
11308   if (UntiedLocalVarsStack.empty())
11309     return Address::invalid();
11310   const UntiedLocalVarsAddressesMap &UntiedData = UntiedLocalVarsStack.back();
11311   auto It = UntiedData.find(VD);
11312   if (It == UntiedData.end())
11313     return Address::invalid();
11314 
11315   return It->second;
11316 }
11317 
11318 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11319     CodeGenModule &CGM, const OMPLoopDirective &S)
11320     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11321   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11322   if (!NeedToPush)
11323     return;
11324   NontemporalDeclsSet &DS =
11325       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11326   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11327     for (const Stmt *Ref : C->private_refs()) {
11328       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11329       const ValueDecl *VD;
11330       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11331         VD = DRE->getDecl();
11332       } else {
11333         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11334         assert((ME->isImplicitCXXThis() ||
11335                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11336                "Expected member of current class.");
11337         VD = ME->getMemberDecl();
11338       }
11339       DS.insert(VD);
11340     }
11341   }
11342 }
11343 
11344 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11345   if (!NeedToPush)
11346     return;
11347   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11348 }
11349 
11350 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11351     CodeGenModule &CGM,
11352     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address> &LocalVars)
11353     : CGM(CGM), NeedToPush(!LocalVars.empty()) {
11354   if (!NeedToPush)
11355     return;
11356   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11357 }
11358 
11359 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11360   if (!NeedToPush)
11361     return;
11362   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11363 }
11364 
11365 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11366   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11367 
11368   return llvm::any_of(
11369       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11370       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11371 }
11372 
11373 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11374     const OMPExecutableDirective &S,
11375     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11376     const {
11377   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11378   // Vars in target/task regions must be excluded completely.
11379   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11380       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11381     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11382     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11383     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11384     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11385       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11386         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11387     }
11388   }
11389   // Exclude vars in private clauses.
11390   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11391     for (const Expr *Ref : C->varlists()) {
11392       if (!Ref->getType()->isScalarType())
11393         continue;
11394       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11395       if (!DRE)
11396         continue;
11397       NeedToCheckForLPCs.insert(DRE->getDecl());
11398     }
11399   }
11400   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11401     for (const Expr *Ref : C->varlists()) {
11402       if (!Ref->getType()->isScalarType())
11403         continue;
11404       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11405       if (!DRE)
11406         continue;
11407       NeedToCheckForLPCs.insert(DRE->getDecl());
11408     }
11409   }
11410   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11411     for (const Expr *Ref : C->varlists()) {
11412       if (!Ref->getType()->isScalarType())
11413         continue;
11414       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11415       if (!DRE)
11416         continue;
11417       NeedToCheckForLPCs.insert(DRE->getDecl());
11418     }
11419   }
11420   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11421     for (const Expr *Ref : C->varlists()) {
11422       if (!Ref->getType()->isScalarType())
11423         continue;
11424       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11425       if (!DRE)
11426         continue;
11427       NeedToCheckForLPCs.insert(DRE->getDecl());
11428     }
11429   }
11430   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11431     for (const Expr *Ref : C->varlists()) {
11432       if (!Ref->getType()->isScalarType())
11433         continue;
11434       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11435       if (!DRE)
11436         continue;
11437       NeedToCheckForLPCs.insert(DRE->getDecl());
11438     }
11439   }
11440   for (const Decl *VD : NeedToCheckForLPCs) {
11441     for (const LastprivateConditionalData &Data :
11442          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11443       if (Data.DeclToUniqueName.count(VD) > 0) {
11444         if (!Data.Disabled)
11445           NeedToAddForLPCsAsDisabled.insert(VD);
11446         break;
11447       }
11448     }
11449   }
11450 }
11451 
11452 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11453     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11454     : CGM(CGF.CGM),
11455       Action((CGM.getLangOpts().OpenMP >= 50 &&
11456               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11457                            [](const OMPLastprivateClause *C) {
11458                              return C->getKind() ==
11459                                     OMPC_LASTPRIVATE_conditional;
11460                            }))
11461                  ? ActionToDo::PushAsLastprivateConditional
11462                  : ActionToDo::DoNotPush) {
11463   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11464   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11465     return;
11466   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11467          "Expected a push action.");
11468   LastprivateConditionalData &Data =
11469       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11470   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11471     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11472       continue;
11473 
11474     for (const Expr *Ref : C->varlists()) {
11475       Data.DeclToUniqueName.insert(std::make_pair(
11476           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11477           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11478     }
11479   }
11480   Data.IVLVal = IVLVal;
11481   Data.Fn = CGF.CurFn;
11482 }
11483 
11484 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11485     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11486     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11487   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11488   if (CGM.getLangOpts().OpenMP < 50)
11489     return;
11490   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11491   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11492   if (!NeedToAddForLPCsAsDisabled.empty()) {
11493     Action = ActionToDo::DisableLastprivateConditional;
11494     LastprivateConditionalData &Data =
11495         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11496     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11497       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11498     Data.Fn = CGF.CurFn;
11499     Data.Disabled = true;
11500   }
11501 }
11502 
11503 CGOpenMPRuntime::LastprivateConditionalRAII
11504 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11505     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11506   return LastprivateConditionalRAII(CGF, S);
11507 }
11508 
11509 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11510   if (CGM.getLangOpts().OpenMP < 50)
11511     return;
11512   if (Action == ActionToDo::DisableLastprivateConditional) {
11513     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11514            "Expected list of disabled private vars.");
11515     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11516   }
11517   if (Action == ActionToDo::PushAsLastprivateConditional) {
11518     assert(
11519         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11520         "Expected list of lastprivate conditional vars.");
11521     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11522   }
11523 }
11524 
11525 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11526                                                         const VarDecl *VD) {
11527   ASTContext &C = CGM.getContext();
11528   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11529   if (I == LastprivateConditionalToTypes.end())
11530     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11531   QualType NewType;
11532   const FieldDecl *VDField;
11533   const FieldDecl *FiredField;
11534   LValue BaseLVal;
11535   auto VI = I->getSecond().find(VD);
11536   if (VI == I->getSecond().end()) {
11537     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11538     RD->startDefinition();
11539     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11540     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11541     RD->completeDefinition();
11542     NewType = C.getRecordType(RD);
11543     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11544     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11545     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11546   } else {
11547     NewType = std::get<0>(VI->getSecond());
11548     VDField = std::get<1>(VI->getSecond());
11549     FiredField = std::get<2>(VI->getSecond());
11550     BaseLVal = std::get<3>(VI->getSecond());
11551   }
11552   LValue FiredLVal =
11553       CGF.EmitLValueForField(BaseLVal, FiredField);
11554   CGF.EmitStoreOfScalar(
11555       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11556       FiredLVal);
11557   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11558 }
11559 
11560 namespace {
11561 /// Checks if the lastprivate conditional variable is referenced in LHS.
11562 class LastprivateConditionalRefChecker final
11563     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11564   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11565   const Expr *FoundE = nullptr;
11566   const Decl *FoundD = nullptr;
11567   StringRef UniqueDeclName;
11568   LValue IVLVal;
11569   llvm::Function *FoundFn = nullptr;
11570   SourceLocation Loc;
11571 
11572 public:
11573   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11574     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11575          llvm::reverse(LPM)) {
11576       auto It = D.DeclToUniqueName.find(E->getDecl());
11577       if (It == D.DeclToUniqueName.end())
11578         continue;
11579       if (D.Disabled)
11580         return false;
11581       FoundE = E;
11582       FoundD = E->getDecl()->getCanonicalDecl();
11583       UniqueDeclName = It->second;
11584       IVLVal = D.IVLVal;
11585       FoundFn = D.Fn;
11586       break;
11587     }
11588     return FoundE == E;
11589   }
11590   bool VisitMemberExpr(const MemberExpr *E) {
11591     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11592       return false;
11593     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11594          llvm::reverse(LPM)) {
11595       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11596       if (It == D.DeclToUniqueName.end())
11597         continue;
11598       if (D.Disabled)
11599         return false;
11600       FoundE = E;
11601       FoundD = E->getMemberDecl()->getCanonicalDecl();
11602       UniqueDeclName = It->second;
11603       IVLVal = D.IVLVal;
11604       FoundFn = D.Fn;
11605       break;
11606     }
11607     return FoundE == E;
11608   }
11609   bool VisitStmt(const Stmt *S) {
11610     for (const Stmt *Child : S->children()) {
11611       if (!Child)
11612         continue;
11613       if (const auto *E = dyn_cast<Expr>(Child))
11614         if (!E->isGLValue())
11615           continue;
11616       if (Visit(Child))
11617         return true;
11618     }
11619     return false;
11620   }
11621   explicit LastprivateConditionalRefChecker(
11622       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11623       : LPM(LPM) {}
11624   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11625   getFoundData() const {
11626     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11627   }
11628 };
11629 } // namespace
11630 
11631 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11632                                                        LValue IVLVal,
11633                                                        StringRef UniqueDeclName,
11634                                                        LValue LVal,
11635                                                        SourceLocation Loc) {
11636   // Last updated loop counter for the lastprivate conditional var.
11637   // int<xx> last_iv = 0;
11638   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11639   llvm::Constant *LastIV =
11640       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11641   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11642       IVLVal.getAlignment().getAsAlign());
11643   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11644 
11645   // Last value of the lastprivate conditional.
11646   // decltype(priv_a) last_a;
11647   llvm::Constant *Last = getOrCreateInternalVariable(
11648       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11649   cast<llvm::GlobalVariable>(Last)->setAlignment(
11650       LVal.getAlignment().getAsAlign());
11651   LValue LastLVal =
11652       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11653 
11654   // Global loop counter. Required to handle inner parallel-for regions.
11655   // iv
11656   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11657 
11658   // #pragma omp critical(a)
11659   // if (last_iv <= iv) {
11660   //   last_iv = iv;
11661   //   last_a = priv_a;
11662   // }
11663   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11664                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11665     Action.Enter(CGF);
11666     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11667     // (last_iv <= iv) ? Check if the variable is updated and store new
11668     // value in global var.
11669     llvm::Value *CmpRes;
11670     if (IVLVal.getType()->isSignedIntegerType()) {
11671       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11672     } else {
11673       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11674              "Loop iteration variable must be integer.");
11675       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11676     }
11677     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11678     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11679     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11680     // {
11681     CGF.EmitBlock(ThenBB);
11682 
11683     //   last_iv = iv;
11684     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11685 
11686     //   last_a = priv_a;
11687     switch (CGF.getEvaluationKind(LVal.getType())) {
11688     case TEK_Scalar: {
11689       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11690       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11691       break;
11692     }
11693     case TEK_Complex: {
11694       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11695       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11696       break;
11697     }
11698     case TEK_Aggregate:
11699       llvm_unreachable(
11700           "Aggregates are not supported in lastprivate conditional.");
11701     }
11702     // }
11703     CGF.EmitBranch(ExitBB);
11704     // There is no need to emit line number for unconditional branch.
11705     (void)ApplyDebugLocation::CreateEmpty(CGF);
11706     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11707   };
11708 
11709   if (CGM.getLangOpts().OpenMPSimd) {
11710     // Do not emit as a critical region as no parallel region could be emitted.
11711     RegionCodeGenTy ThenRCG(CodeGen);
11712     ThenRCG(CGF);
11713   } else {
11714     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11715   }
11716 }
11717 
11718 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11719                                                          const Expr *LHS) {
11720   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11721     return;
11722   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11723   if (!Checker.Visit(LHS))
11724     return;
11725   const Expr *FoundE;
11726   const Decl *FoundD;
11727   StringRef UniqueDeclName;
11728   LValue IVLVal;
11729   llvm::Function *FoundFn;
11730   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11731       Checker.getFoundData();
11732   if (FoundFn != CGF.CurFn) {
11733     // Special codegen for inner parallel regions.
11734     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11735     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11736     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11737            "Lastprivate conditional is not found in outer region.");
11738     QualType StructTy = std::get<0>(It->getSecond());
11739     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11740     LValue PrivLVal = CGF.EmitLValue(FoundE);
11741     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11742         PrivLVal.getAddress(CGF),
11743         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11744     LValue BaseLVal =
11745         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11746     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11747     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11748                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11749                         FiredLVal, llvm::AtomicOrdering::Unordered,
11750                         /*IsVolatile=*/true, /*isInit=*/false);
11751     return;
11752   }
11753 
11754   // Private address of the lastprivate conditional in the current context.
11755   // priv_a
11756   LValue LVal = CGF.EmitLValue(FoundE);
11757   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11758                                    FoundE->getExprLoc());
11759 }
11760 
11761 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11762     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11763     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11764   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11765     return;
11766   auto Range = llvm::reverse(LastprivateConditionalStack);
11767   auto It = llvm::find_if(
11768       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11769   if (It == Range.end() || It->Fn != CGF.CurFn)
11770     return;
11771   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11772   assert(LPCI != LastprivateConditionalToTypes.end() &&
11773          "Lastprivates must be registered already.");
11774   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11775   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11776   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11777   for (const auto &Pair : It->DeclToUniqueName) {
11778     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11779     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11780       continue;
11781     auto I = LPCI->getSecond().find(Pair.first);
11782     assert(I != LPCI->getSecond().end() &&
11783            "Lastprivate must be rehistered already.");
11784     // bool Cmp = priv_a.Fired != 0;
11785     LValue BaseLVal = std::get<3>(I->getSecond());
11786     LValue FiredLVal =
11787         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11788     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11789     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11790     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11791     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11792     // if (Cmp) {
11793     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11794     CGF.EmitBlock(ThenBB);
11795     Address Addr = CGF.GetAddrOfLocalVar(VD);
11796     LValue LVal;
11797     if (VD->getType()->isReferenceType())
11798       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11799                                            AlignmentSource::Decl);
11800     else
11801       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11802                                 AlignmentSource::Decl);
11803     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11804                                      D.getBeginLoc());
11805     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11806     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11807     // }
11808   }
11809 }
11810 
11811 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11812     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11813     SourceLocation Loc) {
11814   if (CGF.getLangOpts().OpenMP < 50)
11815     return;
11816   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11817   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11818          "Unknown lastprivate conditional variable.");
11819   StringRef UniqueName = It->second;
11820   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11821   // The variable was not updated in the region - exit.
11822   if (!GV)
11823     return;
11824   LValue LPLVal = CGF.MakeAddrLValue(
11825       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11826   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11827   CGF.EmitStoreOfScalar(Res, PrivLVal);
11828 }
11829 
11830 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11831     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11832     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11833   llvm_unreachable("Not supported in SIMD-only mode");
11834 }
11835 
11836 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11837     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11838     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11839   llvm_unreachable("Not supported in SIMD-only mode");
11840 }
11841 
11842 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11843     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11844     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11845     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11846     bool Tied, unsigned &NumberOfParts) {
11847   llvm_unreachable("Not supported in SIMD-only mode");
11848 }
11849 
11850 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11851                                            SourceLocation Loc,
11852                                            llvm::Function *OutlinedFn,
11853                                            ArrayRef<llvm::Value *> CapturedVars,
11854                                            const Expr *IfCond) {
11855   llvm_unreachable("Not supported in SIMD-only mode");
11856 }
11857 
11858 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11859     CodeGenFunction &CGF, StringRef CriticalName,
11860     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11861     const Expr *Hint) {
11862   llvm_unreachable("Not supported in SIMD-only mode");
11863 }
11864 
11865 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11866                                            const RegionCodeGenTy &MasterOpGen,
11867                                            SourceLocation Loc) {
11868   llvm_unreachable("Not supported in SIMD-only mode");
11869 }
11870 
11871 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11872                                             SourceLocation Loc) {
11873   llvm_unreachable("Not supported in SIMD-only mode");
11874 }
11875 
11876 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11877     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11878     SourceLocation Loc) {
11879   llvm_unreachable("Not supported in SIMD-only mode");
11880 }
11881 
11882 void CGOpenMPSIMDRuntime::emitSingleRegion(
11883     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11884     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11885     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11886     ArrayRef<const Expr *> AssignmentOps) {
11887   llvm_unreachable("Not supported in SIMD-only mode");
11888 }
11889 
11890 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11891                                             const RegionCodeGenTy &OrderedOpGen,
11892                                             SourceLocation Loc,
11893                                             bool IsThreads) {
11894   llvm_unreachable("Not supported in SIMD-only mode");
11895 }
11896 
11897 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11898                                           SourceLocation Loc,
11899                                           OpenMPDirectiveKind Kind,
11900                                           bool EmitChecks,
11901                                           bool ForceSimpleCall) {
11902   llvm_unreachable("Not supported in SIMD-only mode");
11903 }
11904 
11905 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11906     CodeGenFunction &CGF, SourceLocation Loc,
11907     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11908     bool Ordered, const DispatchRTInput &DispatchValues) {
11909   llvm_unreachable("Not supported in SIMD-only mode");
11910 }
11911 
11912 void CGOpenMPSIMDRuntime::emitForStaticInit(
11913     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11914     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11915   llvm_unreachable("Not supported in SIMD-only mode");
11916 }
11917 
11918 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11919     CodeGenFunction &CGF, SourceLocation Loc,
11920     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11921   llvm_unreachable("Not supported in SIMD-only mode");
11922 }
11923 
11924 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11925                                                      SourceLocation Loc,
11926                                                      unsigned IVSize,
11927                                                      bool IVSigned) {
11928   llvm_unreachable("Not supported in SIMD-only mode");
11929 }
11930 
11931 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11932                                               SourceLocation Loc,
11933                                               OpenMPDirectiveKind DKind) {
11934   llvm_unreachable("Not supported in SIMD-only mode");
11935 }
11936 
11937 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11938                                               SourceLocation Loc,
11939                                               unsigned IVSize, bool IVSigned,
11940                                               Address IL, Address LB,
11941                                               Address UB, Address ST) {
11942   llvm_unreachable("Not supported in SIMD-only mode");
11943 }
11944 
11945 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11946                                                llvm::Value *NumThreads,
11947                                                SourceLocation Loc) {
11948   llvm_unreachable("Not supported in SIMD-only mode");
11949 }
11950 
11951 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11952                                              ProcBindKind ProcBind,
11953                                              SourceLocation Loc) {
11954   llvm_unreachable("Not supported in SIMD-only mode");
11955 }
11956 
11957 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11958                                                     const VarDecl *VD,
11959                                                     Address VDAddr,
11960                                                     SourceLocation Loc) {
11961   llvm_unreachable("Not supported in SIMD-only mode");
11962 }
11963 
11964 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11965     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11966     CodeGenFunction *CGF) {
11967   llvm_unreachable("Not supported in SIMD-only mode");
11968 }
11969 
11970 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11971     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11972   llvm_unreachable("Not supported in SIMD-only mode");
11973 }
11974 
11975 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11976                                     ArrayRef<const Expr *> Vars,
11977                                     SourceLocation Loc,
11978                                     llvm::AtomicOrdering AO) {
11979   llvm_unreachable("Not supported in SIMD-only mode");
11980 }
11981 
11982 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11983                                        const OMPExecutableDirective &D,
11984                                        llvm::Function *TaskFunction,
11985                                        QualType SharedsTy, Address Shareds,
11986                                        const Expr *IfCond,
11987                                        const OMPTaskDataTy &Data) {
11988   llvm_unreachable("Not supported in SIMD-only mode");
11989 }
11990 
11991 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11992     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11993     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11994     const Expr *IfCond, const OMPTaskDataTy &Data) {
11995   llvm_unreachable("Not supported in SIMD-only mode");
11996 }
11997 
11998 void CGOpenMPSIMDRuntime::emitReduction(
11999     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12000     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12001     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12002   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12003   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12004                                  ReductionOps, Options);
12005 }
12006 
12007 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12008     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12009     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12010   llvm_unreachable("Not supported in SIMD-only mode");
12011 }
12012 
12013 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12014                                                 SourceLocation Loc,
12015                                                 bool IsWorksharingReduction) {
12016   llvm_unreachable("Not supported in SIMD-only mode");
12017 }
12018 
12019 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12020                                                   SourceLocation Loc,
12021                                                   ReductionCodeGen &RCG,
12022                                                   unsigned N) {
12023   llvm_unreachable("Not supported in SIMD-only mode");
12024 }
12025 
12026 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12027                                                   SourceLocation Loc,
12028                                                   llvm::Value *ReductionsPtr,
12029                                                   LValue SharedLVal) {
12030   llvm_unreachable("Not supported in SIMD-only mode");
12031 }
12032 
12033 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12034                                            SourceLocation Loc) {
12035   llvm_unreachable("Not supported in SIMD-only mode");
12036 }
12037 
12038 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12039     CodeGenFunction &CGF, SourceLocation Loc,
12040     OpenMPDirectiveKind CancelRegion) {
12041   llvm_unreachable("Not supported in SIMD-only mode");
12042 }
12043 
12044 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12045                                          SourceLocation Loc, const Expr *IfCond,
12046                                          OpenMPDirectiveKind CancelRegion) {
12047   llvm_unreachable("Not supported in SIMD-only mode");
12048 }
12049 
12050 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12051     const OMPExecutableDirective &D, StringRef ParentName,
12052     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12053     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12054   llvm_unreachable("Not supported in SIMD-only mode");
12055 }
12056 
12057 void CGOpenMPSIMDRuntime::emitTargetCall(
12058     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12059     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12060     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12061     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12062                                      const OMPLoopDirective &D)>
12063         SizeEmitter) {
12064   llvm_unreachable("Not supported in SIMD-only mode");
12065 }
12066 
12067 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12068   llvm_unreachable("Not supported in SIMD-only mode");
12069 }
12070 
12071 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12072   llvm_unreachable("Not supported in SIMD-only mode");
12073 }
12074 
12075 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12076   return false;
12077 }
12078 
12079 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12080                                         const OMPExecutableDirective &D,
12081                                         SourceLocation Loc,
12082                                         llvm::Function *OutlinedFn,
12083                                         ArrayRef<llvm::Value *> CapturedVars) {
12084   llvm_unreachable("Not supported in SIMD-only mode");
12085 }
12086 
12087 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12088                                              const Expr *NumTeams,
12089                                              const Expr *ThreadLimit,
12090                                              SourceLocation Loc) {
12091   llvm_unreachable("Not supported in SIMD-only mode");
12092 }
12093 
12094 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12095     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12096     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12097   llvm_unreachable("Not supported in SIMD-only mode");
12098 }
12099 
12100 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12101     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12102     const Expr *Device) {
12103   llvm_unreachable("Not supported in SIMD-only mode");
12104 }
12105 
12106 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12107                                            const OMPLoopDirective &D,
12108                                            ArrayRef<Expr *> NumIterations) {
12109   llvm_unreachable("Not supported in SIMD-only mode");
12110 }
12111 
12112 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12113                                               const OMPDependClause *C) {
12114   llvm_unreachable("Not supported in SIMD-only mode");
12115 }
12116 
12117 const VarDecl *
12118 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12119                                         const VarDecl *NativeParam) const {
12120   llvm_unreachable("Not supported in SIMD-only mode");
12121 }
12122 
12123 Address
12124 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12125                                          const VarDecl *NativeParam,
12126                                          const VarDecl *TargetParam) const {
12127   llvm_unreachable("Not supported in SIMD-only mode");
12128 }
12129