1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412 
413 public:
414   /// Constructs region for combined constructs.
415   /// \param CodeGen Code generation sequence for combined directives. Includes
416   /// a list of functions used for code generation of implicitly inlined
417   /// regions.
418   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419                           OpenMPDirectiveKind Kind, bool HasCancel)
420       : CGF(CGF) {
421     // Start emission for the construct.
422     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426     CGF.LambdaThisCaptureField = nullptr;
427     BlockInfo = CGF.BlockInfo;
428     CGF.BlockInfo = nullptr;
429   }
430 
431   ~InlinedOpenMPRegionRAII() {
432     // Restore original CapturedStmtInfo only if we're done with code emission.
433     auto *OldCSI =
434         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435     delete CGF.CapturedStmtInfo;
436     CGF.CapturedStmtInfo = OldCSI;
437     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439     CGF.BlockInfo = BlockInfo;
440   }
441 };
442 
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447   /// Use trampoline for internal microtask.
448   OMP_IDENT_IMD = 0x01,
449   /// Use c-style ident structure.
450   OMP_IDENT_KMPC = 0x02,
451   /// Atomic reduction option for kmpc_reduce.
452   OMP_ATOMIC_REDUCE = 0x10,
453   /// Explicit 'barrier' directive.
454   OMP_IDENT_BARRIER_EXPL = 0x20,
455   /// Implicit barrier in code.
456   OMP_IDENT_BARRIER_IMPL = 0x40,
457   /// Implicit barrier in 'for' directive.
458   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459   /// Implicit barrier in 'sections' directive.
460   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461   /// Implicit barrier in 'single' directive.
462   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463   /// Call of __kmp_for_static_init for static loop.
464   OMP_IDENT_WORK_LOOP = 0x200,
465   /// Call of __kmp_for_static_init for sections.
466   OMP_IDENT_WORK_SECTIONS = 0x400,
467   /// Call of __kmp_for_static_init for distribute.
468   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471 
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476   /// flag undefined.
477   OMP_REQ_UNDEFINED               = 0x000,
478   /// no requires clause present.
479   OMP_REQ_NONE                    = 0x001,
480   /// reverse_offload clause.
481   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482   /// unified_address clause.
483   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484   /// unified_shared_memory clause.
485   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486   /// dynamic_allocators clause.
487   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490 
491 enum OpenMPOffloadingReservedDeviceIDs {
492   /// Device ID if the device was not defined, runtime should get it
493   /// from environment variables in the spec.
494   OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497 
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504 ///                                  see above  */
505 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506 ///                                  KMP_IDENT_KMPC identifies this union
507 ///                                  member  */
508 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509 ///                                  see above */
510 ///#if USE_ITT_BUILD
511 ///                            /*  but currently used for storing
512 ///                                region-specific ITT */
513 ///                            /*  contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516 ///                                 C++  */
517 ///    char const *psource;    /**< String describing the source location.
518 ///                            The string is composed of semi-colon separated
519 //                             fields which describe the source file,
520 ///                            the function and a pair of line numbers that
521 ///                            delimit the construct.
522 ///                             */
523 /// } ident_t;
524 enum IdentFieldIndex {
525   /// might be used in Fortran
526   IdentField_Reserved_1,
527   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528   IdentField_Flags,
529   /// Not really used in Fortran any more
530   IdentField_Reserved_2,
531   /// Source[4] in Fortran, do not use for C++
532   IdentField_Reserved_3,
533   /// String describing the source location. The string is composed of
534   /// semi-colon separated fields which describe the source file, the function
535   /// and a pair of line numbers that delimit the construct.
536   IdentField_PSource
537 };
538 
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542   /// Lower bound for default (unordered) versions.
543   OMP_sch_lower = 32,
544   OMP_sch_static_chunked = 33,
545   OMP_sch_static = 34,
546   OMP_sch_dynamic_chunked = 35,
547   OMP_sch_guided_chunked = 36,
548   OMP_sch_runtime = 37,
549   OMP_sch_auto = 38,
550   /// static with chunk adjustment (e.g., simd)
551   OMP_sch_static_balanced_chunked = 45,
552   /// Lower bound for 'ordered' versions.
553   OMP_ord_lower = 64,
554   OMP_ord_static_chunked = 65,
555   OMP_ord_static = 66,
556   OMP_ord_dynamic_chunked = 67,
557   OMP_ord_guided_chunked = 68,
558   OMP_ord_runtime = 69,
559   OMP_ord_auto = 70,
560   OMP_sch_default = OMP_sch_static,
561   /// dist_schedule types
562   OMP_dist_sch_static_chunked = 91,
563   OMP_dist_sch_static = 92,
564   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565   /// Set if the monotonic schedule modifier was present.
566   OMP_sch_modifier_monotonic = (1 << 29),
567   /// Set if the nonmonotonic schedule modifier was present.
568   OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570 
571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572 /// region.
573 class CleanupTy final : public EHScopeStack::Cleanup {
574   PrePostActionTy *Action;
575 
576 public:
577   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
578   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579     if (!CGF.HaveInsertPoint())
580       return;
581     Action->Exit(CGF);
582   }
583 };
584 
585 } // anonymous namespace
586 
587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588   CodeGenFunction::RunCleanupsScope Scope(CGF);
589   if (PrePostAction) {
590     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591     Callback(CodeGen, CGF, *PrePostAction);
592   } else {
593     PrePostActionTy Action;
594     Callback(CodeGen, CGF, Action);
595   }
596 }
597 
598 /// Check if the combiner is a call to UDR combiner and if it is so return the
599 /// UDR decl used for reduction.
600 static const OMPDeclareReductionDecl *
601 getReductionInit(const Expr *ReductionOp) {
602   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604       if (const auto *DRE =
605               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607           return DRD;
608   return nullptr;
609 }
610 
611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612                                              const OMPDeclareReductionDecl *DRD,
613                                              const Expr *InitOp,
614                                              Address Private, Address Original,
615                                              QualType Ty) {
616   if (DRD->getInitializer()) {
617     std::pair<llvm::Function *, llvm::Function *> Reduction =
618         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619     const auto *CE = cast<CallExpr>(InitOp);
620     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623     const auto *LHSDRE =
624         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625     const auto *RHSDRE =
626         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629                             [=]() { return Private; });
630     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631                             [=]() { return Original; });
632     (void)PrivateScope.Privatize();
633     RValue Func = RValue::get(Reduction.second);
634     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635     CGF.EmitIgnoredExpr(InitOp);
636   } else {
637     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639     auto *GV = new llvm::GlobalVariable(
640         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641         llvm::GlobalValue::PrivateLinkage, Init, Name);
642     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643     RValue InitRVal;
644     switch (CGF.getEvaluationKind(Ty)) {
645     case TEK_Scalar:
646       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647       break;
648     case TEK_Complex:
649       InitRVal =
650           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651       break;
652     case TEK_Aggregate:
653       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654       break;
655     }
656     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659                          /*IsInitializer=*/false);
660   }
661 }
662 
663 /// Emit initialization of arrays of complex types.
664 /// \param DestAddr Address of the array.
665 /// \param Type Type of array.
666 /// \param Init Initial expression of array.
667 /// \param SrcAddr Address of the original array.
668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669                                  QualType Type, bool EmitDeclareReductionInit,
670                                  const Expr *Init,
671                                  const OMPDeclareReductionDecl *DRD,
672                                  Address SrcAddr = Address::invalid()) {
673   // Perform element-by-element initialization.
674   QualType ElementTy;
675 
676   // Drill down to the base element type on both arrays.
677   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679   DestAddr =
680       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681   if (DRD)
682     SrcAddr =
683         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684 
685   llvm::Value *SrcBegin = nullptr;
686   if (DRD)
687     SrcBegin = SrcAddr.getPointer();
688   llvm::Value *DestBegin = DestAddr.getPointer();
689   // Cast from pointer to array type to pointer to single element.
690   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691   // The basic structure here is a while-do loop.
692   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694   llvm::Value *IsEmpty =
695       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697 
698   // Enter the loop body, making that address the current address.
699   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700   CGF.EmitBlock(BodyBB);
701 
702   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703 
704   llvm::PHINode *SrcElementPHI = nullptr;
705   Address SrcElementCurrent = Address::invalid();
706   if (DRD) {
707     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708                                           "omp.arraycpy.srcElementPast");
709     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710     SrcElementCurrent =
711         Address(SrcElementPHI,
712                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713   }
714   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716   DestElementPHI->addIncoming(DestBegin, EntryBB);
717   Address DestElementCurrent =
718       Address(DestElementPHI,
719               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720 
721   // Emit copy.
722   {
723     CodeGenFunction::RunCleanupsScope InitScope(CGF);
724     if (EmitDeclareReductionInit) {
725       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726                                        SrcElementCurrent, ElementTy);
727     } else
728       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729                            /*IsInitializer=*/false);
730   }
731 
732   if (DRD) {
733     // Shift the address forward by one element.
734     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737   }
738 
739   // Shift the address forward by one element.
740   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742   // Check whether we've reached the end.
743   llvm::Value *Done =
744       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747 
748   // Done.
749   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750 }
751 
752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753   return CGF.EmitOMPSharedLValue(E);
754 }
755 
756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757                                             const Expr *E) {
758   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760   return LValue();
761 }
762 
763 void ReductionCodeGen::emitAggregateInitialization(
764     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765     const OMPDeclareReductionDecl *DRD) {
766   // Emit VarDecl with copy init for arrays.
767   // Get the address of the original variable captured in current
768   // captured region.
769   const auto *PrivateVD =
770       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771   bool EmitDeclareReductionInit =
772       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
773   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774                        EmitDeclareReductionInit,
775                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776                                                 : PrivateVD->getInit(),
777                        DRD, SharedLVal.getAddress(CGF));
778 }
779 
780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781                                    ArrayRef<const Expr *> Origs,
782                                    ArrayRef<const Expr *> Privates,
783                                    ArrayRef<const Expr *> ReductionOps) {
784   ClausesData.reserve(Shareds.size());
785   SharedAddresses.reserve(Shareds.size());
786   Sizes.reserve(Shareds.size());
787   BaseDecls.reserve(Shareds.size());
788   const auto *IOrig = Origs.begin();
789   const auto *IPriv = Privates.begin();
790   const auto *IRed = ReductionOps.begin();
791   for (const Expr *Ref : Shareds) {
792     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793     std::advance(IOrig, 1);
794     std::advance(IPriv, 1);
795     std::advance(IRed, 1);
796   }
797 }
798 
799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801          "Number of generated lvalues must be exactly N.");
802   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804   SharedAddresses.emplace_back(First, Second);
805   if (ClausesData[N].Shared == ClausesData[N].Ref) {
806     OrigAddresses.emplace_back(First, Second);
807   } else {
808     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810     OrigAddresses.emplace_back(First, Second);
811   }
812 }
813 
814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815   const auto *PrivateVD =
816       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817   QualType PrivateType = PrivateVD->getType();
818   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819   if (!PrivateType->isVariablyModifiedType()) {
820     Sizes.emplace_back(
821         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822         nullptr);
823     return;
824   }
825   llvm::Value *Size;
826   llvm::Value *SizeInChars;
827   auto *ElemType =
828       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829           ->getElementType();
830   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831   if (AsArraySection) {
832     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833                                      OrigAddresses[N].first.getPointer(CGF));
834     Size = CGF.Builder.CreateNUWAdd(
835         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837   } else {
838     SizeInChars =
839         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841   }
842   Sizes.emplace_back(SizeInChars, Size);
843   CodeGenFunction::OpaqueValueMapping OpaqueMap(
844       CGF,
845       cast<OpaqueValueExpr>(
846           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847       RValue::get(Size));
848   CGF.EmitVariablyModifiedType(PrivateType);
849 }
850 
851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852                                          llvm::Value *Size) {
853   const auto *PrivateVD =
854       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855   QualType PrivateType = PrivateVD->getType();
856   if (!PrivateType->isVariablyModifiedType()) {
857     assert(!Size && !Sizes[N].second &&
858            "Size should be nullptr for non-variably modified reduction "
859            "items.");
860     return;
861   }
862   CodeGenFunction::OpaqueValueMapping OpaqueMap(
863       CGF,
864       cast<OpaqueValueExpr>(
865           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866       RValue::get(Size));
867   CGF.EmitVariablyModifiedType(PrivateType);
868 }
869 
870 void ReductionCodeGen::emitInitialization(
871     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873   assert(SharedAddresses.size() > N && "No variable was generated");
874   const auto *PrivateVD =
875       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876   const OMPDeclareReductionDecl *DRD =
877       getReductionInit(ClausesData[N].ReductionOp);
878   QualType PrivateType = PrivateVD->getType();
879   PrivateAddr = CGF.Builder.CreateElementBitCast(
880       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881   QualType SharedType = SharedAddresses[N].first.getType();
882   SharedLVal = CGF.MakeAddrLValue(
883       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884                                        CGF.ConvertTypeForMem(SharedType)),
885       SharedType, SharedAddresses[N].first.getBaseInfo(),
886       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888     if (DRD && DRD->getInitializer())
889       (void)DefaultInit(CGF);
890     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
892     (void)DefaultInit(CGF);
893     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894                                      PrivateAddr, SharedLVal.getAddress(CGF),
895                                      SharedLVal.getType());
896   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
897              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899                          PrivateVD->getType().getQualifiers(),
900                          /*IsInitializer=*/false);
901   }
902 }
903 
904 bool ReductionCodeGen::needCleanups(unsigned N) {
905   const auto *PrivateVD =
906       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907   QualType PrivateType = PrivateVD->getType();
908   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909   return DTorKind != QualType::DK_none;
910 }
911 
912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913                                     Address PrivateAddr) {
914   const auto *PrivateVD =
915       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916   QualType PrivateType = PrivateVD->getType();
917   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918   if (needCleanups(N)) {
919     PrivateAddr = CGF.Builder.CreateElementBitCast(
920         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922   }
923 }
924 
925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926                           LValue BaseLV) {
927   BaseTy = BaseTy.getNonReferenceType();
928   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
929          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932     } else {
933       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935     }
936     BaseTy = BaseTy->getPointeeType();
937   }
938   return CGF.MakeAddrLValue(
939       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940                                        CGF.ConvertTypeForMem(ElTy)),
941       BaseLV.getType(), BaseLV.getBaseInfo(),
942       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943 }
944 
945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947                           llvm::Value *Addr) {
948   Address Tmp = Address::invalid();
949   Address TopTmp = Address::invalid();
950   Address MostTopTmp = Address::invalid();
951   BaseTy = BaseTy.getNonReferenceType();
952   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
953          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954     Tmp = CGF.CreateMemTemp(BaseTy);
955     if (TopTmp.isValid())
956       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957     else
958       MostTopTmp = Tmp;
959     TopTmp = Tmp;
960     BaseTy = BaseTy->getPointeeType();
961   }
962   llvm::Type *Ty = BaseLVType;
963   if (Tmp.isValid())
964     Ty = Tmp.getElementType();
965   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966   if (Tmp.isValid()) {
967     CGF.Builder.CreateStore(Addr, Tmp);
968     return MostTopTmp;
969   }
970   return Address(Addr, BaseLVAlignment);
971 }
972 
973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974   const VarDecl *OrigVD = nullptr;
975   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978       Base = TempOASE->getBase()->IgnoreParenImpCasts();
979     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980       Base = TempASE->getBase()->IgnoreParenImpCasts();
981     DE = cast<DeclRefExpr>(Base);
982     OrigVD = cast<VarDecl>(DE->getDecl());
983   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   }
990   return OrigVD;
991 }
992 
993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994                                                Address PrivateAddr) {
995   const DeclRefExpr *DE;
996   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997     BaseDecls.emplace_back(OrigVD);
998     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999     LValue BaseLValue =
1000         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001                     OriginalBaseLValue);
1002     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004     llvm::Value *PrivatePointer =
1005         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006             PrivateAddr.getPointer(),
1007             SharedAddresses[N].first.getAddress(CGF).getType());
1008     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009     return castToBase(CGF, OrigVD->getType(),
1010                       SharedAddresses[N].first.getType(),
1011                       OriginalBaseLValue.getAddress(CGF).getType(),
1012                       OriginalBaseLValue.getAlignment(), Ptr);
1013   }
1014   BaseDecls.emplace_back(
1015       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016   return PrivateAddr;
1017 }
1018 
1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   return DRD && DRD->getInitializer();
1023 }
1024 
1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026   return CGF.EmitLoadOfPointerLValue(
1027       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028       getThreadIDVariable()->getType()->castAs<PointerType>());
1029 }
1030 
1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032   if (!CGF.HaveInsertPoint())
1033     return;
1034   // 1.2.2 OpenMP Language Terminology
1035   // Structured block - An executable statement with a single entry at the
1036   // top and a single exit at the bottom.
1037   // The point of exit cannot be a branch out of the structured block.
1038   // longjmp() and throw() must not violate the entry/exit criteria.
1039   CGF.EHStack.pushTerminate();
1040   CodeGen(CGF);
1041   CGF.EHStack.popTerminate();
1042 }
1043 
1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045     CodeGenFunction &CGF) {
1046   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047                             getThreadIDVariable()->getType(),
1048                             AlignmentSource::Decl);
1049 }
1050 
1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052                                        QualType FieldTy) {
1053   auto *Field = FieldDecl::Create(
1054       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057   Field->setAccess(AS_public);
1058   DC->addDecl(Field);
1059   return Field;
1060 }
1061 
1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063                                  StringRef Separator)
1064     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067 
1068   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069   OMPBuilder.initialize();
1070   loadOffloadInfoMetadata();
1071 }
1072 
1073 void CGOpenMPRuntime::clear() {
1074   InternalVars.clear();
1075   // Clean non-target variable declarations possibly used only in debug info.
1076   for (const auto &Data : EmittedNonTargetVariables) {
1077     if (!Data.getValue().pointsToAliveValue())
1078       continue;
1079     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080     if (!GV)
1081       continue;
1082     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083       continue;
1084     GV->eraseFromParent();
1085   }
1086 }
1087 
1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089   SmallString<128> Buffer;
1090   llvm::raw_svector_ostream OS(Buffer);
1091   StringRef Sep = FirstSeparator;
1092   for (StringRef Part : Parts) {
1093     OS << Sep << Part;
1094     Sep = Separator;
1095   }
1096   return std::string(OS.str());
1097 }
1098 
1099 static llvm::Function *
1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101                           const Expr *CombinerInitializer, const VarDecl *In,
1102                           const VarDecl *Out, bool IsCombiner) {
1103   // void .omp_combiner.(Ty *in, Ty *out);
1104   ASTContext &C = CGM.getContext();
1105   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106   FunctionArgList Args;
1107   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111   Args.push_back(&OmpOutParm);
1112   Args.push_back(&OmpInParm);
1113   const CGFunctionInfo &FnInfo =
1114       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116   std::string Name = CGM.getOpenMPRuntime().getName(
1117       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1118   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119                                     Name, &CGM.getModule());
1120   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121   if (CGM.getLangOpts().Optimize) {
1122     Fn->removeFnAttr(llvm::Attribute::NoInline);
1123     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125   }
1126   CodeGenFunction CGF(CGM);
1127   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130                     Out->getLocation());
1131   CodeGenFunction::OMPPrivateScope Scope(CGF);
1132   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135         .getAddress(CGF);
1136   });
1137   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140         .getAddress(CGF);
1141   });
1142   (void)Scope.Privatize();
1143   if (!IsCombiner && Out->hasInit() &&
1144       !CGF.isTrivialInitializer(Out->getInit())) {
1145     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146                          Out->getType().getQualifiers(),
1147                          /*IsInitializer=*/true);
1148   }
1149   if (CombinerInitializer)
1150     CGF.EmitIgnoredExpr(CombinerInitializer);
1151   Scope.ForceCleanup();
1152   CGF.FinishFunction();
1153   return Fn;
1154 }
1155 
1156 void CGOpenMPRuntime::emitUserDefinedReduction(
1157     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158   if (UDRMap.count(D) > 0)
1159     return;
1160   llvm::Function *Combiner = emitCombinerOrInitializer(
1161       CGM, D->getType(), D->getCombiner(),
1162       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164       /*IsCombiner=*/true);
1165   llvm::Function *Initializer = nullptr;
1166   if (const Expr *Init = D->getInitializer()) {
1167     Initializer = emitCombinerOrInitializer(
1168         CGM, D->getType(),
1169         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170                                                                      : nullptr,
1171         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173         /*IsCombiner=*/false);
1174   }
1175   UDRMap.try_emplace(D, Combiner, Initializer);
1176   if (CGF) {
1177     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178     Decls.second.push_back(D);
1179   }
1180 }
1181 
1182 std::pair<llvm::Function *, llvm::Function *>
1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184   auto I = UDRMap.find(D);
1185   if (I != UDRMap.end())
1186     return I->second;
1187   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188   return UDRMap.lookup(D);
1189 }
1190 
1191 namespace {
1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193 // Builder if one is present.
1194 struct PushAndPopStackRAII {
1195   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196                       bool HasCancel)
1197       : OMPBuilder(OMPBuilder) {
1198     if (!OMPBuilder)
1199       return;
1200 
1201     // The following callback is the crucial part of clangs cleanup process.
1202     //
1203     // NOTE:
1204     // Once the OpenMPIRBuilder is used to create parallel regions (and
1205     // similar), the cancellation destination (Dest below) is determined via
1206     // IP. That means if we have variables to finalize we split the block at IP,
1207     // use the new block (=BB) as destination to build a JumpDest (via
1208     // getJumpDestInCurrentScope(BB)) which then is fed to
1209     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210     // to push & pop an FinalizationInfo object.
1211     // The FiniCB will still be needed but at the point where the
1212     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214       assert(IP.getBlock()->end() == IP.getPoint() &&
1215              "Clang CG should cause non-terminated block!");
1216       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217       CGF.Builder.restoreIP(IP);
1218       CodeGenFunction::JumpDest Dest =
1219           CGF.getOMPCancelDestination(OMPD_parallel);
1220       CGF.EmitBranchThroughCleanup(Dest);
1221     };
1222 
1223     // TODO: Remove this once we emit parallel regions through the
1224     //       OpenMPIRBuilder as it can do this setup internally.
1225     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226         {FiniCB, OMPD_parallel, HasCancel});
1227     OMPBuilder->pushFinalizationCB(std::move(FI));
1228   }
1229   ~PushAndPopStackRAII() {
1230     if (OMPBuilder)
1231       OMPBuilder->popFinalizationCB();
1232   }
1233   llvm::OpenMPIRBuilder *OMPBuilder;
1234 };
1235 } // namespace
1236 
1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241   assert(ThreadIDVar->getType()->isPointerType() &&
1242          "thread id variable must be of type kmp_int32 *");
1243   CodeGenFunction CGF(CGM, true);
1244   bool HasCancel = false;
1245   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246     HasCancel = OPD->hasCancel();
1247   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248     HasCancel = OPD->hasCancel();
1249   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250     HasCancel = OPSD->hasCancel();
1251   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252     HasCancel = OPFD->hasCancel();
1253   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256     HasCancel = OPFD->hasCancel();
1257   else if (const auto *OPFD =
1258                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259     HasCancel = OPFD->hasCancel();
1260   else if (const auto *OPFD =
1261                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263 
1264   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265   //       parallel region to make cancellation barriers work properly.
1266   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269                                     HasCancel, OutlinedHelperName);
1270   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272 }
1273 
1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278   return emitParallelOrTeamsOutlinedFunction(
1279       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280 }
1281 
1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286   return emitParallelOrTeamsOutlinedFunction(
1287       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294     bool Tied, unsigned &NumberOfParts) {
1295   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296                                               PrePostActionTy &) {
1297     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299     llvm::Value *TaskArgs[] = {
1300         UpLoc, ThreadID,
1301         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302                                     TaskTVar->getType()->castAs<PointerType>())
1303             .getPointer(CGF)};
1304     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1306                         TaskArgs);
1307   };
1308   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309                                                             UntiedCodeGen);
1310   CodeGen.setAction(Action);
1311   assert(!ThreadIDVar->getType()->isPointerType() &&
1312          "thread id variable must be of type kmp_int32 for tasks");
1313   const OpenMPDirectiveKind Region =
1314       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315                                                       : OMPD_task;
1316   const CapturedStmt *CS = D.getCapturedStmt(Region);
1317   bool HasCancel = false;
1318   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319     HasCancel = TD->hasCancel();
1320   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321     HasCancel = TD->hasCancel();
1322   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323     HasCancel = TD->hasCancel();
1324   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326 
1327   CodeGenFunction CGF(CGM, true);
1328   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329                                         InnermostKind, HasCancel, Action);
1330   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332   if (!Tied)
1333     NumberOfParts = Action.getNumberOfParts();
1334   return Res;
1335 }
1336 
1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338                              const RecordDecl *RD, const CGRecordLayout &RL,
1339                              ArrayRef<llvm::Constant *> Data) {
1340   llvm::StructType *StructTy = RL.getLLVMType();
1341   unsigned PrevIdx = 0;
1342   ConstantInitBuilder CIBuilder(CGM);
1343   auto DI = Data.begin();
1344   for (const FieldDecl *FD : RD->fields()) {
1345     unsigned Idx = RL.getLLVMFieldNo(FD);
1346     // Fill the alignment.
1347     for (unsigned I = PrevIdx; I < Idx; ++I)
1348       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349     PrevIdx = Idx + 1;
1350     Fields.add(*DI);
1351     ++DI;
1352   }
1353 }
1354 
1355 template <class... As>
1356 static llvm::GlobalVariable *
1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359                    As &&... Args) {
1360   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362   ConstantInitBuilder CIBuilder(CGM);
1363   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364   buildStructValue(Fields, CGM, RD, RL, Data);
1365   return Fields.finishAndCreateGlobal(
1366       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367       std::forward<As>(Args)...);
1368 }
1369 
1370 template <typename T>
1371 static void
1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373                                          ArrayRef<llvm::Constant *> Data,
1374                                          T &Parent) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378   buildStructValue(Fields, CGM, RD, RL, Data);
1379   Fields.finishAndAddTo(Parent);
1380 }
1381 
1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383                                              bool AtCurrentPoint) {
1384   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386 
1387   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388   if (AtCurrentPoint) {
1389     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391   } else {
1392     Elem.second.ServiceInsertPt =
1393         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395   }
1396 }
1397 
1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400   if (Elem.second.ServiceInsertPt) {
1401     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402     Elem.second.ServiceInsertPt = nullptr;
1403     Ptr->eraseFromParent();
1404   }
1405 }
1406 
1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1408                                                   SourceLocation Loc,
1409                                                   SmallString<128> &Buffer) {
1410   llvm::raw_svector_ostream OS(Buffer);
1411   // Build debug location
1412   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1413   OS << ";" << PLoc.getFilename() << ";";
1414   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1415     OS << FD->getQualifiedNameAsString();
1416   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1417   return OS.str();
1418 }
1419 
1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1421                                                  SourceLocation Loc,
1422                                                  unsigned Flags) {
1423   llvm::Constant *SrcLocStr;
1424   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1425       Loc.isInvalid()) {
1426     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1427   } else {
1428     std::string FunctionName = "";
1429     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1430       FunctionName = FD->getQualifiedNameAsString();
1431     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1432     const char *FileName = PLoc.getFilename();
1433     unsigned Line = PLoc.getLine();
1434     unsigned Column = PLoc.getColumn();
1435     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1436                                                 Line, Column);
1437   }
1438   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1439   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1440                                      Reserved2Flags);
1441 }
1442 
1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1444                                           SourceLocation Loc) {
1445   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1446   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1447   // the clang invariants used below might be broken.
1448   if (CGM.getLangOpts().OpenMPIRBuilder) {
1449     SmallString<128> Buffer;
1450     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1451     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1452         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1453     return OMPBuilder.getOrCreateThreadID(
1454         OMPBuilder.getOrCreateIdent(SrcLocStr));
1455   }
1456 
1457   llvm::Value *ThreadID = nullptr;
1458   // Check whether we've already cached a load of the thread id in this
1459   // function.
1460   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1461   if (I != OpenMPLocThreadIDMap.end()) {
1462     ThreadID = I->second.ThreadID;
1463     if (ThreadID != nullptr)
1464       return ThreadID;
1465   }
1466   // If exceptions are enabled, do not use parameter to avoid possible crash.
1467   if (auto *OMPRegionInfo =
1468           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1469     if (OMPRegionInfo->getThreadIDVariable()) {
1470       // Check if this an outlined function with thread id passed as argument.
1471       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1472       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1473       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1474           !CGF.getLangOpts().CXXExceptions ||
1475           CGF.Builder.GetInsertBlock() == TopBlock ||
1476           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1477           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1478               TopBlock ||
1479           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1480               CGF.Builder.GetInsertBlock()) {
1481         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1482         // If value loaded in entry block, cache it and use it everywhere in
1483         // function.
1484         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1485           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1486           Elem.second.ThreadID = ThreadID;
1487         }
1488         return ThreadID;
1489       }
1490     }
1491   }
1492 
1493   // This is not an outlined function region - need to call __kmpc_int32
1494   // kmpc_global_thread_num(ident_t *loc).
1495   // Generate thread id value and cache this value for use across the
1496   // function.
1497   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498   if (!Elem.second.ServiceInsertPt)
1499     setLocThreadIdInsertPt(CGF);
1500   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1501   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1502   llvm::CallInst *Call = CGF.Builder.CreateCall(
1503       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1504                                             OMPRTL___kmpc_global_thread_num),
1505       emitUpdateLocation(CGF, Loc));
1506   Call->setCallingConv(CGF.getRuntimeCC());
1507   Elem.second.ThreadID = Call;
1508   return Call;
1509 }
1510 
1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1512   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1513   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1514     clearLocThreadIdInsertPt(CGF);
1515     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1516   }
1517   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1518     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1519       UDRMap.erase(D);
1520     FunctionUDRMap.erase(CGF.CurFn);
1521   }
1522   auto I = FunctionUDMMap.find(CGF.CurFn);
1523   if (I != FunctionUDMMap.end()) {
1524     for(const auto *D : I->second)
1525       UDMMap.erase(D);
1526     FunctionUDMMap.erase(I);
1527   }
1528   LastprivateConditionalToTypes.erase(CGF.CurFn);
1529   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1530 }
1531 
1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1533   return OMPBuilder.IdentPtr;
1534 }
1535 
1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1537   if (!Kmpc_MicroTy) {
1538     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1539     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1540                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1541     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1542   }
1543   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1544 }
1545 
1546 llvm::FunctionCallee
1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1548   assert((IVSize == 32 || IVSize == 64) &&
1549          "IV size is not compatible with the omp runtime");
1550   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1551                                             : "__kmpc_for_static_init_4u")
1552                                 : (IVSigned ? "__kmpc_for_static_init_8"
1553                                             : "__kmpc_for_static_init_8u");
1554   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1555   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1556   llvm::Type *TypeParams[] = {
1557     getIdentTyPointerTy(),                     // loc
1558     CGM.Int32Ty,                               // tid
1559     CGM.Int32Ty,                               // schedtype
1560     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1561     PtrTy,                                     // p_lower
1562     PtrTy,                                     // p_upper
1563     PtrTy,                                     // p_stride
1564     ITy,                                       // incr
1565     ITy                                        // chunk
1566   };
1567   auto *FnTy =
1568       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1569   return CGM.CreateRuntimeFunction(FnTy, Name);
1570 }
1571 
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1574   assert((IVSize == 32 || IVSize == 64) &&
1575          "IV size is not compatible with the omp runtime");
1576   StringRef Name =
1577       IVSize == 32
1578           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1579           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1582                                CGM.Int32Ty,           // tid
1583                                CGM.Int32Ty,           // schedtype
1584                                ITy,                   // lower
1585                                ITy,                   // upper
1586                                ITy,                   // stride
1587                                ITy                    // chunk
1588   };
1589   auto *FnTy =
1590       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1591   return CGM.CreateRuntimeFunction(FnTy, Name);
1592 }
1593 
1594 llvm::FunctionCallee
1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1596   assert((IVSize == 32 || IVSize == 64) &&
1597          "IV size is not compatible with the omp runtime");
1598   StringRef Name =
1599       IVSize == 32
1600           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1601           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1602   llvm::Type *TypeParams[] = {
1603       getIdentTyPointerTy(), // loc
1604       CGM.Int32Ty,           // tid
1605   };
1606   auto *FnTy =
1607       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1608   return CGM.CreateRuntimeFunction(FnTy, Name);
1609 }
1610 
1611 llvm::FunctionCallee
1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1613   assert((IVSize == 32 || IVSize == 64) &&
1614          "IV size is not compatible with the omp runtime");
1615   StringRef Name =
1616       IVSize == 32
1617           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1618           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1619   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1620   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621   llvm::Type *TypeParams[] = {
1622     getIdentTyPointerTy(),                     // loc
1623     CGM.Int32Ty,                               // tid
1624     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1625     PtrTy,                                     // p_lower
1626     PtrTy,                                     // p_upper
1627     PtrTy                                      // p_stride
1628   };
1629   auto *FnTy =
1630       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1631   return CGM.CreateRuntimeFunction(FnTy, Name);
1632 }
1633 
1634 /// Obtain information that uniquely identifies a target entry. This
1635 /// consists of the file and device IDs as well as line number associated with
1636 /// the relevant entry source location.
1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1638                                      unsigned &DeviceID, unsigned &FileID,
1639                                      unsigned &LineNum) {
1640   SourceManager &SM = C.getSourceManager();
1641 
1642   // The loc should be always valid and have a file ID (the user cannot use
1643   // #pragma directives in macros)
1644 
1645   assert(Loc.isValid() && "Source location is expected to be always valid.");
1646 
1647   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1648   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1649 
1650   llvm::sys::fs::UniqueID ID;
1651   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1652     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1653         << PLoc.getFilename() << EC.message();
1654 
1655   DeviceID = ID.getDevice();
1656   FileID = ID.getFile();
1657   LineNum = PLoc.getLine();
1658 }
1659 
1660 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1661   if (CGM.getLangOpts().OpenMPSimd)
1662     return Address::invalid();
1663   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1664       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1665   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1666               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1667                HasRequiresUnifiedSharedMemory))) {
1668     SmallString<64> PtrName;
1669     {
1670       llvm::raw_svector_ostream OS(PtrName);
1671       OS << CGM.getMangledName(GlobalDecl(VD));
1672       if (!VD->isExternallyVisible()) {
1673         unsigned DeviceID, FileID, Line;
1674         getTargetEntryUniqueInfo(CGM.getContext(),
1675                                  VD->getCanonicalDecl()->getBeginLoc(),
1676                                  DeviceID, FileID, Line);
1677         OS << llvm::format("_%x", FileID);
1678       }
1679       OS << "_decl_tgt_ref_ptr";
1680     }
1681     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1682     if (!Ptr) {
1683       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1684       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1685                                         PtrName);
1686 
1687       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1688       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1689 
1690       if (!CGM.getLangOpts().OpenMPIsDevice)
1691         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1692       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1693     }
1694     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1695   }
1696   return Address::invalid();
1697 }
1698 
1699 llvm::Constant *
1700 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1701   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1702          !CGM.getContext().getTargetInfo().isTLSSupported());
1703   // Lookup the entry, lazily creating it if necessary.
1704   std::string Suffix = getName({"cache", ""});
1705   return getOrCreateInternalVariable(
1706       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1707 }
1708 
1709 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1710                                                 const VarDecl *VD,
1711                                                 Address VDAddr,
1712                                                 SourceLocation Loc) {
1713   if (CGM.getLangOpts().OpenMPUseTLS &&
1714       CGM.getContext().getTargetInfo().isTLSSupported())
1715     return VDAddr;
1716 
1717   llvm::Type *VarTy = VDAddr.getElementType();
1718   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1719                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1720                                                        CGM.Int8PtrTy),
1721                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1722                          getOrCreateThreadPrivateCache(VD)};
1723   return Address(CGF.EmitRuntimeCall(
1724                      OMPBuilder.getOrCreateRuntimeFunction(
1725                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1726                      Args),
1727                  VDAddr.getAlignment());
1728 }
1729 
1730 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1731     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1732     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1733   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1734   // library.
1735   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1736   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1737                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1738                       OMPLoc);
1739   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1740   // to register constructor/destructor for variable.
1741   llvm::Value *Args[] = {
1742       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1743       Ctor, CopyCtor, Dtor};
1744   CGF.EmitRuntimeCall(
1745       OMPBuilder.getOrCreateRuntimeFunction(
1746           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1747       Args);
1748 }
1749 
1750 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1751     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1752     bool PerformInit, CodeGenFunction *CGF) {
1753   if (CGM.getLangOpts().OpenMPUseTLS &&
1754       CGM.getContext().getTargetInfo().isTLSSupported())
1755     return nullptr;
1756 
1757   VD = VD->getDefinition(CGM.getContext());
1758   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1759     QualType ASTTy = VD->getType();
1760 
1761     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1762     const Expr *Init = VD->getAnyInitializer();
1763     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1764       // Generate function that re-emits the declaration's initializer into the
1765       // threadprivate copy of the variable VD
1766       CodeGenFunction CtorCGF(CGM);
1767       FunctionArgList Args;
1768       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1769                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1770                             ImplicitParamDecl::Other);
1771       Args.push_back(&Dst);
1772 
1773       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1774           CGM.getContext().VoidPtrTy, Args);
1775       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1776       std::string Name = getName({"__kmpc_global_ctor_", ""});
1777       llvm::Function *Fn =
1778           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1779       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1780                             Args, Loc, Loc);
1781       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1782           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1783           CGM.getContext().VoidPtrTy, Dst.getLocation());
1784       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1785       Arg = CtorCGF.Builder.CreateElementBitCast(
1786           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1787       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1788                                /*IsInitializer=*/true);
1789       ArgVal = CtorCGF.EmitLoadOfScalar(
1790           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1791           CGM.getContext().VoidPtrTy, Dst.getLocation());
1792       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1793       CtorCGF.FinishFunction();
1794       Ctor = Fn;
1795     }
1796     if (VD->getType().isDestructedType() != QualType::DK_none) {
1797       // Generate function that emits destructor call for the threadprivate copy
1798       // of the variable VD
1799       CodeGenFunction DtorCGF(CGM);
1800       FunctionArgList Args;
1801       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1802                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1803                             ImplicitParamDecl::Other);
1804       Args.push_back(&Dst);
1805 
1806       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1807           CGM.getContext().VoidTy, Args);
1808       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1809       std::string Name = getName({"__kmpc_global_dtor_", ""});
1810       llvm::Function *Fn =
1811           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1812       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1813       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1814                             Loc, Loc);
1815       // Create a scope with an artificial location for the body of this function.
1816       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1817       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1818           DtorCGF.GetAddrOfLocalVar(&Dst),
1819           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1820       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1821                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1822                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1823       DtorCGF.FinishFunction();
1824       Dtor = Fn;
1825     }
1826     // Do not emit init function if it is not required.
1827     if (!Ctor && !Dtor)
1828       return nullptr;
1829 
1830     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1831     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1832                                                /*isVarArg=*/false)
1833                            ->getPointerTo();
1834     // Copying constructor for the threadprivate variable.
1835     // Must be NULL - reserved by runtime, but currently it requires that this
1836     // parameter is always NULL. Otherwise it fires assertion.
1837     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1838     if (Ctor == nullptr) {
1839       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1840                                              /*isVarArg=*/false)
1841                          ->getPointerTo();
1842       Ctor = llvm::Constant::getNullValue(CtorTy);
1843     }
1844     if (Dtor == nullptr) {
1845       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1846                                              /*isVarArg=*/false)
1847                          ->getPointerTo();
1848       Dtor = llvm::Constant::getNullValue(DtorTy);
1849     }
1850     if (!CGF) {
1851       auto *InitFunctionTy =
1852           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1853       std::string Name = getName({"__omp_threadprivate_init_", ""});
1854       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1855           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1856       CodeGenFunction InitCGF(CGM);
1857       FunctionArgList ArgList;
1858       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1859                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1860                             Loc, Loc);
1861       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1862       InitCGF.FinishFunction();
1863       return InitFunction;
1864     }
1865     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1866   }
1867   return nullptr;
1868 }
1869 
1870 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1871                                                      llvm::GlobalVariable *Addr,
1872                                                      bool PerformInit) {
1873   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1874       !CGM.getLangOpts().OpenMPIsDevice)
1875     return false;
1876   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1877       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1878   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1879       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1880        HasRequiresUnifiedSharedMemory))
1881     return CGM.getLangOpts().OpenMPIsDevice;
1882   VD = VD->getDefinition(CGM.getContext());
1883   assert(VD && "Unknown VarDecl");
1884 
1885   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1886     return CGM.getLangOpts().OpenMPIsDevice;
1887 
1888   QualType ASTTy = VD->getType();
1889   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1890 
1891   // Produce the unique prefix to identify the new target regions. We use
1892   // the source location of the variable declaration which we know to not
1893   // conflict with any target region.
1894   unsigned DeviceID;
1895   unsigned FileID;
1896   unsigned Line;
1897   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1898   SmallString<128> Buffer, Out;
1899   {
1900     llvm::raw_svector_ostream OS(Buffer);
1901     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1902        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1903   }
1904 
1905   const Expr *Init = VD->getAnyInitializer();
1906   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1907     llvm::Constant *Ctor;
1908     llvm::Constant *ID;
1909     if (CGM.getLangOpts().OpenMPIsDevice) {
1910       // Generate function that re-emits the declaration's initializer into
1911       // the threadprivate copy of the variable VD
1912       CodeGenFunction CtorCGF(CGM);
1913 
1914       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1915       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1916       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1917           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1918       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1919       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1920                             FunctionArgList(), Loc, Loc);
1921       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1922       CtorCGF.EmitAnyExprToMem(Init,
1923                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1924                                Init->getType().getQualifiers(),
1925                                /*IsInitializer=*/true);
1926       CtorCGF.FinishFunction();
1927       Ctor = Fn;
1928       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1929       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1930     } else {
1931       Ctor = new llvm::GlobalVariable(
1932           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1933           llvm::GlobalValue::PrivateLinkage,
1934           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1935       ID = Ctor;
1936     }
1937 
1938     // Register the information for the entry associated with the constructor.
1939     Out.clear();
1940     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1941         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1942         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1943   }
1944   if (VD->getType().isDestructedType() != QualType::DK_none) {
1945     llvm::Constant *Dtor;
1946     llvm::Constant *ID;
1947     if (CGM.getLangOpts().OpenMPIsDevice) {
1948       // Generate function that emits destructor call for the threadprivate
1949       // copy of the variable VD
1950       CodeGenFunction DtorCGF(CGM);
1951 
1952       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1953       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1954       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1955           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1956       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1957       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1958                             FunctionArgList(), Loc, Loc);
1959       // Create a scope with an artificial location for the body of this
1960       // function.
1961       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1962       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1963                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1964                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1965       DtorCGF.FinishFunction();
1966       Dtor = Fn;
1967       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1968       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1969     } else {
1970       Dtor = new llvm::GlobalVariable(
1971           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1972           llvm::GlobalValue::PrivateLinkage,
1973           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1974       ID = Dtor;
1975     }
1976     // Register the information for the entry associated with the destructor.
1977     Out.clear();
1978     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1979         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1980         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1981   }
1982   return CGM.getLangOpts().OpenMPIsDevice;
1983 }
1984 
1985 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1986                                                           QualType VarType,
1987                                                           StringRef Name) {
1988   std::string Suffix = getName({"artificial", ""});
1989   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1990   llvm::Value *GAddr =
1991       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1992   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1993       CGM.getTarget().isTLSSupported()) {
1994     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1995     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
1996   }
1997   std::string CacheSuffix = getName({"cache", ""});
1998   llvm::Value *Args[] = {
1999       emitUpdateLocation(CGF, SourceLocation()),
2000       getThreadID(CGF, SourceLocation()),
2001       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2002       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2003                                 /*isSigned=*/false),
2004       getOrCreateInternalVariable(
2005           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2006   return Address(
2007       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2008           CGF.EmitRuntimeCall(
2009               OMPBuilder.getOrCreateRuntimeFunction(
2010                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2011               Args),
2012           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2013       CGM.getContext().getTypeAlignInChars(VarType));
2014 }
2015 
2016 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2017                                    const RegionCodeGenTy &ThenGen,
2018                                    const RegionCodeGenTy &ElseGen) {
2019   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2020 
2021   // If the condition constant folds and can be elided, try to avoid emitting
2022   // the condition and the dead arm of the if/else.
2023   bool CondConstant;
2024   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2025     if (CondConstant)
2026       ThenGen(CGF);
2027     else
2028       ElseGen(CGF);
2029     return;
2030   }
2031 
2032   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2033   // emit the conditional branch.
2034   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2035   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2036   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2037   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2038 
2039   // Emit the 'then' code.
2040   CGF.EmitBlock(ThenBlock);
2041   ThenGen(CGF);
2042   CGF.EmitBranch(ContBlock);
2043   // Emit the 'else' code if present.
2044   // There is no need to emit line number for unconditional branch.
2045   (void)ApplyDebugLocation::CreateEmpty(CGF);
2046   CGF.EmitBlock(ElseBlock);
2047   ElseGen(CGF);
2048   // There is no need to emit line number for unconditional branch.
2049   (void)ApplyDebugLocation::CreateEmpty(CGF);
2050   CGF.EmitBranch(ContBlock);
2051   // Emit the continuation block for code after the if.
2052   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2053 }
2054 
2055 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2056                                        llvm::Function *OutlinedFn,
2057                                        ArrayRef<llvm::Value *> CapturedVars,
2058                                        const Expr *IfCond) {
2059   if (!CGF.HaveInsertPoint())
2060     return;
2061   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2062   auto &M = CGM.getModule();
2063   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2064                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2065     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2066     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067     llvm::Value *Args[] = {
2068         RTLoc,
2069         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2070         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2071     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2072     RealArgs.append(std::begin(Args), std::end(Args));
2073     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2074 
2075     llvm::FunctionCallee RTLFn =
2076         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2077     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2078   };
2079   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2080                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2081     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2082     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2083     // Build calls:
2084     // __kmpc_serialized_parallel(&Loc, GTid);
2085     llvm::Value *Args[] = {RTLoc, ThreadID};
2086     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2087                             M, OMPRTL___kmpc_serialized_parallel),
2088                         Args);
2089 
2090     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2091     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2092     Address ZeroAddrBound =
2093         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2094                                          /*Name=*/".bound.zero.addr");
2095     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2096     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2097     // ThreadId for serialized parallels is 0.
2098     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2099     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2100     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2101     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2102 
2103     // __kmpc_end_serialized_parallel(&Loc, GTid);
2104     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2105     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2106                             M, OMPRTL___kmpc_end_serialized_parallel),
2107                         EndArgs);
2108   };
2109   if (IfCond) {
2110     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2111   } else {
2112     RegionCodeGenTy ThenRCG(ThenGen);
2113     ThenRCG(CGF);
2114   }
2115 }
2116 
2117 // If we're inside an (outlined) parallel region, use the region info's
2118 // thread-ID variable (it is passed in a first argument of the outlined function
2119 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2120 // regular serial code region, get thread ID by calling kmp_int32
2121 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2122 // return the address of that temp.
2123 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2124                                              SourceLocation Loc) {
2125   if (auto *OMPRegionInfo =
2126           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2127     if (OMPRegionInfo->getThreadIDVariable())
2128       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2129 
2130   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2131   QualType Int32Ty =
2132       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2133   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2134   CGF.EmitStoreOfScalar(ThreadID,
2135                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2136 
2137   return ThreadIDTemp;
2138 }
2139 
2140 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2141     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2142   SmallString<256> Buffer;
2143   llvm::raw_svector_ostream Out(Buffer);
2144   Out << Name;
2145   StringRef RuntimeName = Out.str();
2146   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2147   if (Elem.second) {
2148     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2149            "OMP internal variable has different type than requested");
2150     return &*Elem.second;
2151   }
2152 
2153   return Elem.second = new llvm::GlobalVariable(
2154              CGM.getModule(), Ty, /*IsConstant*/ false,
2155              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2156              Elem.first(), /*InsertBefore=*/nullptr,
2157              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2158 }
2159 
2160 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2161   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2162   std::string Name = getName({Prefix, "var"});
2163   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2164 }
2165 
2166 namespace {
2167 /// Common pre(post)-action for different OpenMP constructs.
2168 class CommonActionTy final : public PrePostActionTy {
2169   llvm::FunctionCallee EnterCallee;
2170   ArrayRef<llvm::Value *> EnterArgs;
2171   llvm::FunctionCallee ExitCallee;
2172   ArrayRef<llvm::Value *> ExitArgs;
2173   bool Conditional;
2174   llvm::BasicBlock *ContBlock = nullptr;
2175 
2176 public:
2177   CommonActionTy(llvm::FunctionCallee EnterCallee,
2178                  ArrayRef<llvm::Value *> EnterArgs,
2179                  llvm::FunctionCallee ExitCallee,
2180                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2181       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2182         ExitArgs(ExitArgs), Conditional(Conditional) {}
2183   void Enter(CodeGenFunction &CGF) override {
2184     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2185     if (Conditional) {
2186       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2187       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2188       ContBlock = CGF.createBasicBlock("omp_if.end");
2189       // Generate the branch (If-stmt)
2190       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2191       CGF.EmitBlock(ThenBlock);
2192     }
2193   }
2194   void Done(CodeGenFunction &CGF) {
2195     // Emit the rest of blocks/branches
2196     CGF.EmitBranch(ContBlock);
2197     CGF.EmitBlock(ContBlock, true);
2198   }
2199   void Exit(CodeGenFunction &CGF) override {
2200     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2201   }
2202 };
2203 } // anonymous namespace
2204 
2205 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2206                                          StringRef CriticalName,
2207                                          const RegionCodeGenTy &CriticalOpGen,
2208                                          SourceLocation Loc, const Expr *Hint) {
2209   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2210   // CriticalOpGen();
2211   // __kmpc_end_critical(ident_t *, gtid, Lock);
2212   // Prepare arguments and build a call to __kmpc_critical
2213   if (!CGF.HaveInsertPoint())
2214     return;
2215   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2216                          getCriticalRegionLock(CriticalName)};
2217   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2218                                                 std::end(Args));
2219   if (Hint) {
2220     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2221         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2222   }
2223   CommonActionTy Action(
2224       OMPBuilder.getOrCreateRuntimeFunction(
2225           CGM.getModule(),
2226           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2227       EnterArgs,
2228       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2229                                             OMPRTL___kmpc_end_critical),
2230       Args);
2231   CriticalOpGen.setAction(Action);
2232   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2233 }
2234 
2235 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2236                                        const RegionCodeGenTy &MasterOpGen,
2237                                        SourceLocation Loc) {
2238   if (!CGF.HaveInsertPoint())
2239     return;
2240   // if(__kmpc_master(ident_t *, gtid)) {
2241   //   MasterOpGen();
2242   //   __kmpc_end_master(ident_t *, gtid);
2243   // }
2244   // Prepare arguments and build a call to __kmpc_master
2245   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2246   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2247                             CGM.getModule(), OMPRTL___kmpc_master),
2248                         Args,
2249                         OMPBuilder.getOrCreateRuntimeFunction(
2250                             CGM.getModule(), OMPRTL___kmpc_end_master),
2251                         Args,
2252                         /*Conditional=*/true);
2253   MasterOpGen.setAction(Action);
2254   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2255   Action.Done(CGF);
2256 }
2257 
2258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2259                                         SourceLocation Loc) {
2260   if (!CGF.HaveInsertPoint())
2261     return;
2262   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2263     OMPBuilder.createTaskyield(CGF.Builder);
2264   } else {
2265     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2266     llvm::Value *Args[] = {
2267         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2268         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2269     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2270                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2271                         Args);
2272   }
2273 
2274   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2275     Region->emitUntiedSwitch(CGF);
2276 }
2277 
2278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2279                                           const RegionCodeGenTy &TaskgroupOpGen,
2280                                           SourceLocation Loc) {
2281   if (!CGF.HaveInsertPoint())
2282     return;
2283   // __kmpc_taskgroup(ident_t *, gtid);
2284   // TaskgroupOpGen();
2285   // __kmpc_end_taskgroup(ident_t *, gtid);
2286   // Prepare arguments and build a call to __kmpc_taskgroup
2287   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2288   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2289                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2290                         Args,
2291                         OMPBuilder.getOrCreateRuntimeFunction(
2292                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2293                         Args);
2294   TaskgroupOpGen.setAction(Action);
2295   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2296 }
2297 
2298 /// Given an array of pointers to variables, project the address of a
2299 /// given variable.
2300 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2301                                       unsigned Index, const VarDecl *Var) {
2302   // Pull out the pointer to the variable.
2303   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2304   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2305 
2306   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2307   Addr = CGF.Builder.CreateElementBitCast(
2308       Addr, CGF.ConvertTypeForMem(Var->getType()));
2309   return Addr;
2310 }
2311 
2312 static llvm::Value *emitCopyprivateCopyFunction(
2313     CodeGenModule &CGM, llvm::Type *ArgsType,
2314     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2315     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2316     SourceLocation Loc) {
2317   ASTContext &C = CGM.getContext();
2318   // void copy_func(void *LHSArg, void *RHSArg);
2319   FunctionArgList Args;
2320   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2321                            ImplicitParamDecl::Other);
2322   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2323                            ImplicitParamDecl::Other);
2324   Args.push_back(&LHSArg);
2325   Args.push_back(&RHSArg);
2326   const auto &CGFI =
2327       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2328   std::string Name =
2329       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2330   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2331                                     llvm::GlobalValue::InternalLinkage, Name,
2332                                     &CGM.getModule());
2333   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2334   Fn->setDoesNotRecurse();
2335   CodeGenFunction CGF(CGM);
2336   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2337   // Dest = (void*[n])(LHSArg);
2338   // Src = (void*[n])(RHSArg);
2339   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2340       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2341       ArgsType), CGF.getPointerAlign());
2342   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2343       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2344       ArgsType), CGF.getPointerAlign());
2345   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2346   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2347   // ...
2348   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2349   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2350     const auto *DestVar =
2351         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2352     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2353 
2354     const auto *SrcVar =
2355         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2356     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2357 
2358     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2359     QualType Type = VD->getType();
2360     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2361   }
2362   CGF.FinishFunction();
2363   return Fn;
2364 }
2365 
2366 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2367                                        const RegionCodeGenTy &SingleOpGen,
2368                                        SourceLocation Loc,
2369                                        ArrayRef<const Expr *> CopyprivateVars,
2370                                        ArrayRef<const Expr *> SrcExprs,
2371                                        ArrayRef<const Expr *> DstExprs,
2372                                        ArrayRef<const Expr *> AssignmentOps) {
2373   if (!CGF.HaveInsertPoint())
2374     return;
2375   assert(CopyprivateVars.size() == SrcExprs.size() &&
2376          CopyprivateVars.size() == DstExprs.size() &&
2377          CopyprivateVars.size() == AssignmentOps.size());
2378   ASTContext &C = CGM.getContext();
2379   // int32 did_it = 0;
2380   // if(__kmpc_single(ident_t *, gtid)) {
2381   //   SingleOpGen();
2382   //   __kmpc_end_single(ident_t *, gtid);
2383   //   did_it = 1;
2384   // }
2385   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2386   // <copy_func>, did_it);
2387 
2388   Address DidIt = Address::invalid();
2389   if (!CopyprivateVars.empty()) {
2390     // int32 did_it = 0;
2391     QualType KmpInt32Ty =
2392         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2393     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2394     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2395   }
2396   // Prepare arguments and build a call to __kmpc_single
2397   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2398   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2399                             CGM.getModule(), OMPRTL___kmpc_single),
2400                         Args,
2401                         OMPBuilder.getOrCreateRuntimeFunction(
2402                             CGM.getModule(), OMPRTL___kmpc_end_single),
2403                         Args,
2404                         /*Conditional=*/true);
2405   SingleOpGen.setAction(Action);
2406   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2407   if (DidIt.isValid()) {
2408     // did_it = 1;
2409     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2410   }
2411   Action.Done(CGF);
2412   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2413   // <copy_func>, did_it);
2414   if (DidIt.isValid()) {
2415     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2416     QualType CopyprivateArrayTy = C.getConstantArrayType(
2417         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2418         /*IndexTypeQuals=*/0);
2419     // Create a list of all private variables for copyprivate.
2420     Address CopyprivateList =
2421         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2422     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2423       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2424       CGF.Builder.CreateStore(
2425           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2426               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2427               CGF.VoidPtrTy),
2428           Elem);
2429     }
2430     // Build function that copies private values from single region to all other
2431     // threads in the corresponding parallel region.
2432     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2433         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2434         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2435     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2436     Address CL =
2437       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2438                                                       CGF.VoidPtrTy);
2439     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2440     llvm::Value *Args[] = {
2441         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2442         getThreadID(CGF, Loc),        // i32 <gtid>
2443         BufSize,                      // size_t <buf_size>
2444         CL.getPointer(),              // void *<copyprivate list>
2445         CpyFn,                        // void (*) (void *, void *) <copy_func>
2446         DidItVal                      // i32 did_it
2447     };
2448     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2449                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2450                         Args);
2451   }
2452 }
2453 
2454 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2455                                         const RegionCodeGenTy &OrderedOpGen,
2456                                         SourceLocation Loc, bool IsThreads) {
2457   if (!CGF.HaveInsertPoint())
2458     return;
2459   // __kmpc_ordered(ident_t *, gtid);
2460   // OrderedOpGen();
2461   // __kmpc_end_ordered(ident_t *, gtid);
2462   // Prepare arguments and build a call to __kmpc_ordered
2463   if (IsThreads) {
2464     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2465     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2466                               CGM.getModule(), OMPRTL___kmpc_ordered),
2467                           Args,
2468                           OMPBuilder.getOrCreateRuntimeFunction(
2469                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2470                           Args);
2471     OrderedOpGen.setAction(Action);
2472     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2473     return;
2474   }
2475   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2476 }
2477 
2478 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2479   unsigned Flags;
2480   if (Kind == OMPD_for)
2481     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2482   else if (Kind == OMPD_sections)
2483     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2484   else if (Kind == OMPD_single)
2485     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2486   else if (Kind == OMPD_barrier)
2487     Flags = OMP_IDENT_BARRIER_EXPL;
2488   else
2489     Flags = OMP_IDENT_BARRIER_IMPL;
2490   return Flags;
2491 }
2492 
2493 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2494     CodeGenFunction &CGF, const OMPLoopDirective &S,
2495     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2496   // Check if the loop directive is actually a doacross loop directive. In this
2497   // case choose static, 1 schedule.
2498   if (llvm::any_of(
2499           S.getClausesOfKind<OMPOrderedClause>(),
2500           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2501     ScheduleKind = OMPC_SCHEDULE_static;
2502     // Chunk size is 1 in this case.
2503     llvm::APInt ChunkSize(32, 1);
2504     ChunkExpr = IntegerLiteral::Create(
2505         CGF.getContext(), ChunkSize,
2506         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2507         SourceLocation());
2508   }
2509 }
2510 
2511 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2512                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2513                                       bool ForceSimpleCall) {
2514   // Check if we should use the OMPBuilder
2515   auto *OMPRegionInfo =
2516       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2517   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2518     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2519         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2520     return;
2521   }
2522 
2523   if (!CGF.HaveInsertPoint())
2524     return;
2525   // Build call __kmpc_cancel_barrier(loc, thread_id);
2526   // Build call __kmpc_barrier(loc, thread_id);
2527   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2528   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2529   // thread_id);
2530   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2531                          getThreadID(CGF, Loc)};
2532   if (OMPRegionInfo) {
2533     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2534       llvm::Value *Result = CGF.EmitRuntimeCall(
2535           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2536                                                 OMPRTL___kmpc_cancel_barrier),
2537           Args);
2538       if (EmitChecks) {
2539         // if (__kmpc_cancel_barrier()) {
2540         //   exit from construct;
2541         // }
2542         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2543         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2544         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2545         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2546         CGF.EmitBlock(ExitBB);
2547         //   exit from construct;
2548         CodeGenFunction::JumpDest CancelDestination =
2549             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2550         CGF.EmitBranchThroughCleanup(CancelDestination);
2551         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2552       }
2553       return;
2554     }
2555   }
2556   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2557                           CGM.getModule(), OMPRTL___kmpc_barrier),
2558                       Args);
2559 }
2560 
2561 /// Map the OpenMP loop schedule to the runtime enumeration.
2562 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2563                                           bool Chunked, bool Ordered) {
2564   switch (ScheduleKind) {
2565   case OMPC_SCHEDULE_static:
2566     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2567                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2568   case OMPC_SCHEDULE_dynamic:
2569     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2570   case OMPC_SCHEDULE_guided:
2571     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2572   case OMPC_SCHEDULE_runtime:
2573     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2574   case OMPC_SCHEDULE_auto:
2575     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2576   case OMPC_SCHEDULE_unknown:
2577     assert(!Chunked && "chunk was specified but schedule kind not known");
2578     return Ordered ? OMP_ord_static : OMP_sch_static;
2579   }
2580   llvm_unreachable("Unexpected runtime schedule");
2581 }
2582 
2583 /// Map the OpenMP distribute schedule to the runtime enumeration.
2584 static OpenMPSchedType
2585 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2586   // only static is allowed for dist_schedule
2587   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2588 }
2589 
2590 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2591                                          bool Chunked) const {
2592   OpenMPSchedType Schedule =
2593       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2594   return Schedule == OMP_sch_static;
2595 }
2596 
2597 bool CGOpenMPRuntime::isStaticNonchunked(
2598     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2599   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2600   return Schedule == OMP_dist_sch_static;
2601 }
2602 
2603 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2604                                       bool Chunked) const {
2605   OpenMPSchedType Schedule =
2606       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2607   return Schedule == OMP_sch_static_chunked;
2608 }
2609 
2610 bool CGOpenMPRuntime::isStaticChunked(
2611     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2612   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2613   return Schedule == OMP_dist_sch_static_chunked;
2614 }
2615 
2616 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2617   OpenMPSchedType Schedule =
2618       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2619   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2620   return Schedule != OMP_sch_static;
2621 }
2622 
2623 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2624                                   OpenMPScheduleClauseModifier M1,
2625                                   OpenMPScheduleClauseModifier M2) {
2626   int Modifier = 0;
2627   switch (M1) {
2628   case OMPC_SCHEDULE_MODIFIER_monotonic:
2629     Modifier = OMP_sch_modifier_monotonic;
2630     break;
2631   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2632     Modifier = OMP_sch_modifier_nonmonotonic;
2633     break;
2634   case OMPC_SCHEDULE_MODIFIER_simd:
2635     if (Schedule == OMP_sch_static_chunked)
2636       Schedule = OMP_sch_static_balanced_chunked;
2637     break;
2638   case OMPC_SCHEDULE_MODIFIER_last:
2639   case OMPC_SCHEDULE_MODIFIER_unknown:
2640     break;
2641   }
2642   switch (M2) {
2643   case OMPC_SCHEDULE_MODIFIER_monotonic:
2644     Modifier = OMP_sch_modifier_monotonic;
2645     break;
2646   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2647     Modifier = OMP_sch_modifier_nonmonotonic;
2648     break;
2649   case OMPC_SCHEDULE_MODIFIER_simd:
2650     if (Schedule == OMP_sch_static_chunked)
2651       Schedule = OMP_sch_static_balanced_chunked;
2652     break;
2653   case OMPC_SCHEDULE_MODIFIER_last:
2654   case OMPC_SCHEDULE_MODIFIER_unknown:
2655     break;
2656   }
2657   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2658   // If the static schedule kind is specified or if the ordered clause is
2659   // specified, and if the nonmonotonic modifier is not specified, the effect is
2660   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2661   // modifier is specified, the effect is as if the nonmonotonic modifier is
2662   // specified.
2663   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2664     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2665           Schedule == OMP_sch_static_balanced_chunked ||
2666           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2667           Schedule == OMP_dist_sch_static_chunked ||
2668           Schedule == OMP_dist_sch_static))
2669       Modifier = OMP_sch_modifier_nonmonotonic;
2670   }
2671   return Schedule | Modifier;
2672 }
2673 
2674 void CGOpenMPRuntime::emitForDispatchInit(
2675     CodeGenFunction &CGF, SourceLocation Loc,
2676     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2677     bool Ordered, const DispatchRTInput &DispatchValues) {
2678   if (!CGF.HaveInsertPoint())
2679     return;
2680   OpenMPSchedType Schedule = getRuntimeSchedule(
2681       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2682   assert(Ordered ||
2683          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2684           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2685           Schedule != OMP_sch_static_balanced_chunked));
2686   // Call __kmpc_dispatch_init(
2687   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2688   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2689   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2690 
2691   // If the Chunk was not specified in the clause - use default value 1.
2692   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2693                                             : CGF.Builder.getIntN(IVSize, 1);
2694   llvm::Value *Args[] = {
2695       emitUpdateLocation(CGF, Loc),
2696       getThreadID(CGF, Loc),
2697       CGF.Builder.getInt32(addMonoNonMonoModifier(
2698           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2699       DispatchValues.LB,                                     // Lower
2700       DispatchValues.UB,                                     // Upper
2701       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2702       Chunk                                                  // Chunk
2703   };
2704   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2705 }
2706 
2707 static void emitForStaticInitCall(
2708     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2709     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2710     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2711     const CGOpenMPRuntime::StaticRTInput &Values) {
2712   if (!CGF.HaveInsertPoint())
2713     return;
2714 
2715   assert(!Values.Ordered);
2716   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2717          Schedule == OMP_sch_static_balanced_chunked ||
2718          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2719          Schedule == OMP_dist_sch_static ||
2720          Schedule == OMP_dist_sch_static_chunked);
2721 
2722   // Call __kmpc_for_static_init(
2723   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2724   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2725   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2726   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2727   llvm::Value *Chunk = Values.Chunk;
2728   if (Chunk == nullptr) {
2729     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2730             Schedule == OMP_dist_sch_static) &&
2731            "expected static non-chunked schedule");
2732     // If the Chunk was not specified in the clause - use default value 1.
2733     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2734   } else {
2735     assert((Schedule == OMP_sch_static_chunked ||
2736             Schedule == OMP_sch_static_balanced_chunked ||
2737             Schedule == OMP_ord_static_chunked ||
2738             Schedule == OMP_dist_sch_static_chunked) &&
2739            "expected static chunked schedule");
2740   }
2741   llvm::Value *Args[] = {
2742       UpdateLocation,
2743       ThreadId,
2744       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2745                                                   M2)), // Schedule type
2746       Values.IL.getPointer(),                           // &isLastIter
2747       Values.LB.getPointer(),                           // &LB
2748       Values.UB.getPointer(),                           // &UB
2749       Values.ST.getPointer(),                           // &Stride
2750       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2751       Chunk                                             // Chunk
2752   };
2753   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2754 }
2755 
2756 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2757                                         SourceLocation Loc,
2758                                         OpenMPDirectiveKind DKind,
2759                                         const OpenMPScheduleTy &ScheduleKind,
2760                                         const StaticRTInput &Values) {
2761   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2762       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2763   assert(isOpenMPWorksharingDirective(DKind) &&
2764          "Expected loop-based or sections-based directive.");
2765   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2766                                              isOpenMPLoopDirective(DKind)
2767                                                  ? OMP_IDENT_WORK_LOOP
2768                                                  : OMP_IDENT_WORK_SECTIONS);
2769   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2770   llvm::FunctionCallee StaticInitFunction =
2771       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2772   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2773   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2774                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2775 }
2776 
2777 void CGOpenMPRuntime::emitDistributeStaticInit(
2778     CodeGenFunction &CGF, SourceLocation Loc,
2779     OpenMPDistScheduleClauseKind SchedKind,
2780     const CGOpenMPRuntime::StaticRTInput &Values) {
2781   OpenMPSchedType ScheduleNum =
2782       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2783   llvm::Value *UpdatedLocation =
2784       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2785   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2786   llvm::FunctionCallee StaticInitFunction =
2787       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2788   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2789                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2790                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2791 }
2792 
2793 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2794                                           SourceLocation Loc,
2795                                           OpenMPDirectiveKind DKind) {
2796   if (!CGF.HaveInsertPoint())
2797     return;
2798   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2799   llvm::Value *Args[] = {
2800       emitUpdateLocation(CGF, Loc,
2801                          isOpenMPDistributeDirective(DKind)
2802                              ? OMP_IDENT_WORK_DISTRIBUTE
2803                              : isOpenMPLoopDirective(DKind)
2804                                    ? OMP_IDENT_WORK_LOOP
2805                                    : OMP_IDENT_WORK_SECTIONS),
2806       getThreadID(CGF, Loc)};
2807   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2808   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2809                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2810                       Args);
2811 }
2812 
2813 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2814                                                  SourceLocation Loc,
2815                                                  unsigned IVSize,
2816                                                  bool IVSigned) {
2817   if (!CGF.HaveInsertPoint())
2818     return;
2819   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2820   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2821   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2822 }
2823 
2824 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2825                                           SourceLocation Loc, unsigned IVSize,
2826                                           bool IVSigned, Address IL,
2827                                           Address LB, Address UB,
2828                                           Address ST) {
2829   // Call __kmpc_dispatch_next(
2830   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2831   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2832   //          kmp_int[32|64] *p_stride);
2833   llvm::Value *Args[] = {
2834       emitUpdateLocation(CGF, Loc),
2835       getThreadID(CGF, Loc),
2836       IL.getPointer(), // &isLastIter
2837       LB.getPointer(), // &Lower
2838       UB.getPointer(), // &Upper
2839       ST.getPointer()  // &Stride
2840   };
2841   llvm::Value *Call =
2842       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2843   return CGF.EmitScalarConversion(
2844       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2845       CGF.getContext().BoolTy, Loc);
2846 }
2847 
2848 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2849                                            llvm::Value *NumThreads,
2850                                            SourceLocation Loc) {
2851   if (!CGF.HaveInsertPoint())
2852     return;
2853   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2854   llvm::Value *Args[] = {
2855       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2856       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2857   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2858                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2859                       Args);
2860 }
2861 
2862 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2863                                          ProcBindKind ProcBind,
2864                                          SourceLocation Loc) {
2865   if (!CGF.HaveInsertPoint())
2866     return;
2867   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2868   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2869   llvm::Value *Args[] = {
2870       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2871       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2872   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2873                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2874                       Args);
2875 }
2876 
2877 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2878                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2879   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2880     OMPBuilder.createFlush(CGF.Builder);
2881   } else {
2882     if (!CGF.HaveInsertPoint())
2883       return;
2884     // Build call void __kmpc_flush(ident_t *loc)
2885     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2886                             CGM.getModule(), OMPRTL___kmpc_flush),
2887                         emitUpdateLocation(CGF, Loc));
2888   }
2889 }
2890 
2891 namespace {
2892 /// Indexes of fields for type kmp_task_t.
2893 enum KmpTaskTFields {
2894   /// List of shared variables.
2895   KmpTaskTShareds,
2896   /// Task routine.
2897   KmpTaskTRoutine,
2898   /// Partition id for the untied tasks.
2899   KmpTaskTPartId,
2900   /// Function with call of destructors for private variables.
2901   Data1,
2902   /// Task priority.
2903   Data2,
2904   /// (Taskloops only) Lower bound.
2905   KmpTaskTLowerBound,
2906   /// (Taskloops only) Upper bound.
2907   KmpTaskTUpperBound,
2908   /// (Taskloops only) Stride.
2909   KmpTaskTStride,
2910   /// (Taskloops only) Is last iteration flag.
2911   KmpTaskTLastIter,
2912   /// (Taskloops only) Reduction data.
2913   KmpTaskTReductions,
2914 };
2915 } // anonymous namespace
2916 
2917 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2918   return OffloadEntriesTargetRegion.empty() &&
2919          OffloadEntriesDeviceGlobalVar.empty();
2920 }
2921 
2922 /// Initialize target region entry.
2923 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2924     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2925                                     StringRef ParentName, unsigned LineNum,
2926                                     unsigned Order) {
2927   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2928                                              "only required for the device "
2929                                              "code generation.");
2930   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2931       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2932                                    OMPTargetRegionEntryTargetRegion);
2933   ++OffloadingEntriesNum;
2934 }
2935 
2936 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2937     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2938                                   StringRef ParentName, unsigned LineNum,
2939                                   llvm::Constant *Addr, llvm::Constant *ID,
2940                                   OMPTargetRegionEntryKind Flags) {
2941   // If we are emitting code for a target, the entry is already initialized,
2942   // only has to be registered.
2943   if (CGM.getLangOpts().OpenMPIsDevice) {
2944     // This could happen if the device compilation is invoked standalone.
2945     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2946       initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2947                                       OffloadingEntriesNum);
2948     auto &Entry =
2949         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2950     Entry.setAddress(Addr);
2951     Entry.setID(ID);
2952     Entry.setFlags(Flags);
2953   } else {
2954     if (Flags ==
2955             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2956         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2957                                  /*IgnoreAddressId*/ true))
2958       return;
2959     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2960            "Target region entry already registered!");
2961     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2962     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2963     ++OffloadingEntriesNum;
2964   }
2965 }
2966 
2967 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2968     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2969     bool IgnoreAddressId) const {
2970   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2971   if (PerDevice == OffloadEntriesTargetRegion.end())
2972     return false;
2973   auto PerFile = PerDevice->second.find(FileID);
2974   if (PerFile == PerDevice->second.end())
2975     return false;
2976   auto PerParentName = PerFile->second.find(ParentName);
2977   if (PerParentName == PerFile->second.end())
2978     return false;
2979   auto PerLine = PerParentName->second.find(LineNum);
2980   if (PerLine == PerParentName->second.end())
2981     return false;
2982   // Fail if this entry is already registered.
2983   if (!IgnoreAddressId &&
2984       (PerLine->second.getAddress() || PerLine->second.getID()))
2985     return false;
2986   return true;
2987 }
2988 
2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2990     const OffloadTargetRegionEntryInfoActTy &Action) {
2991   // Scan all target region entries and perform the provided action.
2992   for (const auto &D : OffloadEntriesTargetRegion)
2993     for (const auto &F : D.second)
2994       for (const auto &P : F.second)
2995         for (const auto &L : P.second)
2996           Action(D.first, F.first, P.first(), L.first, L.second);
2997 }
2998 
2999 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3000     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3001                                        OMPTargetGlobalVarEntryKind Flags,
3002                                        unsigned Order) {
3003   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3004                                              "only required for the device "
3005                                              "code generation.");
3006   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3007   ++OffloadingEntriesNum;
3008 }
3009 
3010 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3011     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3012                                      CharUnits VarSize,
3013                                      OMPTargetGlobalVarEntryKind Flags,
3014                                      llvm::GlobalValue::LinkageTypes Linkage) {
3015   if (CGM.getLangOpts().OpenMPIsDevice) {
3016     // This could happen if the device compilation is invoked standalone.
3017     if (!hasDeviceGlobalVarEntryInfo(VarName))
3018       initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum);
3019     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3020     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3021            "Resetting with the new address.");
3022     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3023       if (Entry.getVarSize().isZero()) {
3024         Entry.setVarSize(VarSize);
3025         Entry.setLinkage(Linkage);
3026       }
3027       return;
3028     }
3029     Entry.setVarSize(VarSize);
3030     Entry.setLinkage(Linkage);
3031     Entry.setAddress(Addr);
3032   } else {
3033     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3034       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3035       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3036              "Entry not initialized!");
3037       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3038              "Resetting with the new address.");
3039       if (Entry.getVarSize().isZero()) {
3040         Entry.setVarSize(VarSize);
3041         Entry.setLinkage(Linkage);
3042       }
3043       return;
3044     }
3045     OffloadEntriesDeviceGlobalVar.try_emplace(
3046         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3047     ++OffloadingEntriesNum;
3048   }
3049 }
3050 
3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3052     actOnDeviceGlobalVarEntriesInfo(
3053         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3054   // Scan all target region entries and perform the provided action.
3055   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3056     Action(E.getKey(), E.getValue());
3057 }
3058 
3059 void CGOpenMPRuntime::createOffloadEntry(
3060     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3061     llvm::GlobalValue::LinkageTypes Linkage) {
3062   StringRef Name = Addr->getName();
3063   llvm::Module &M = CGM.getModule();
3064   llvm::LLVMContext &C = M.getContext();
3065 
3066   // Create constant string with the name.
3067   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3068 
3069   std::string StringName = getName({"omp_offloading", "entry_name"});
3070   auto *Str = new llvm::GlobalVariable(
3071       M, StrPtrInit->getType(), /*isConstant=*/true,
3072       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3073   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3074 
3075   llvm::Constant *Data[] = {
3076       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3077       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3078       llvm::ConstantInt::get(CGM.SizeTy, Size),
3079       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3080       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3081   std::string EntryName = getName({"omp_offloading", "entry", ""});
3082   llvm::GlobalVariable *Entry = createGlobalStruct(
3083       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3084       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3085 
3086   // The entry has to be created in the section the linker expects it to be.
3087   Entry->setSection("omp_offloading_entries");
3088 }
3089 
3090 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3091   // Emit the offloading entries and metadata so that the device codegen side
3092   // can easily figure out what to emit. The produced metadata looks like
3093   // this:
3094   //
3095   // !omp_offload.info = !{!1, ...}
3096   //
3097   // Right now we only generate metadata for function that contain target
3098   // regions.
3099 
3100   // If we are in simd mode or there are no entries, we don't need to do
3101   // anything.
3102   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3103     return;
3104 
3105   llvm::Module &M = CGM.getModule();
3106   llvm::LLVMContext &C = M.getContext();
3107   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3108                          SourceLocation, StringRef>,
3109               16>
3110       OrderedEntries(OffloadEntriesInfoManager.size());
3111   llvm::SmallVector<StringRef, 16> ParentFunctions(
3112       OffloadEntriesInfoManager.size());
3113 
3114   // Auxiliary methods to create metadata values and strings.
3115   auto &&GetMDInt = [this](unsigned V) {
3116     return llvm::ConstantAsMetadata::get(
3117         llvm::ConstantInt::get(CGM.Int32Ty, V));
3118   };
3119 
3120   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3121 
3122   // Create the offloading info metadata node.
3123   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3124 
3125   // Create function that emits metadata for each target region entry;
3126   auto &&TargetRegionMetadataEmitter =
3127       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3128        &GetMDString](
3129           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3130           unsigned Line,
3131           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3132         // Generate metadata for target regions. Each entry of this metadata
3133         // contains:
3134         // - Entry 0 -> Kind of this type of metadata (0).
3135         // - Entry 1 -> Device ID of the file where the entry was identified.
3136         // - Entry 2 -> File ID of the file where the entry was identified.
3137         // - Entry 3 -> Mangled name of the function where the entry was
3138         // identified.
3139         // - Entry 4 -> Line in the file where the entry was identified.
3140         // - Entry 5 -> Order the entry was created.
3141         // The first element of the metadata node is the kind.
3142         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3143                                  GetMDInt(FileID),      GetMDString(ParentName),
3144                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3145 
3146         SourceLocation Loc;
3147         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3148                   E = CGM.getContext().getSourceManager().fileinfo_end();
3149              I != E; ++I) {
3150           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3151               I->getFirst()->getUniqueID().getFile() == FileID) {
3152             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3153                 I->getFirst(), Line, 1);
3154             break;
3155           }
3156         }
3157         // Save this entry in the right position of the ordered entries array.
3158         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3159         ParentFunctions[E.getOrder()] = ParentName;
3160 
3161         // Add metadata to the named metadata node.
3162         MD->addOperand(llvm::MDNode::get(C, Ops));
3163       };
3164 
3165   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3166       TargetRegionMetadataEmitter);
3167 
3168   // Create function that emits metadata for each device global variable entry;
3169   auto &&DeviceGlobalVarMetadataEmitter =
3170       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3171        MD](StringRef MangledName,
3172            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3173                &E) {
3174         // Generate metadata for global variables. Each entry of this metadata
3175         // contains:
3176         // - Entry 0 -> Kind of this type of metadata (1).
3177         // - Entry 1 -> Mangled name of the variable.
3178         // - Entry 2 -> Declare target kind.
3179         // - Entry 3 -> Order the entry was created.
3180         // The first element of the metadata node is the kind.
3181         llvm::Metadata *Ops[] = {
3182             GetMDInt(E.getKind()), GetMDString(MangledName),
3183             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3184 
3185         // Save this entry in the right position of the ordered entries array.
3186         OrderedEntries[E.getOrder()] =
3187             std::make_tuple(&E, SourceLocation(), MangledName);
3188 
3189         // Add metadata to the named metadata node.
3190         MD->addOperand(llvm::MDNode::get(C, Ops));
3191       };
3192 
3193   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3194       DeviceGlobalVarMetadataEmitter);
3195 
3196   for (const auto &E : OrderedEntries) {
3197     assert(std::get<0>(E) && "All ordered entries must exist!");
3198     if (const auto *CE =
3199             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3200                 std::get<0>(E))) {
3201       if (!CE->getID() || !CE->getAddress()) {
3202         // Do not blame the entry if the parent funtion is not emitted.
3203         StringRef FnName = ParentFunctions[CE->getOrder()];
3204         if (!CGM.GetGlobalValue(FnName))
3205           continue;
3206         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3207             DiagnosticsEngine::Error,
3208             "Offloading entry for target region in %0 is incorrect: either the "
3209             "address or the ID is invalid.");
3210         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3211         continue;
3212       }
3213       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3214                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3215     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3216                                              OffloadEntryInfoDeviceGlobalVar>(
3217                    std::get<0>(E))) {
3218       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3219           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3220               CE->getFlags());
3221       switch (Flags) {
3222       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3223         if (CGM.getLangOpts().OpenMPIsDevice &&
3224             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3225           continue;
3226         if (!CE->getAddress()) {
3227           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3228               DiagnosticsEngine::Error, "Offloading entry for declare target "
3229                                         "variable %0 is incorrect: the "
3230                                         "address is invalid.");
3231           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3232           continue;
3233         }
3234         // The vaiable has no definition - no need to add the entry.
3235         if (CE->getVarSize().isZero())
3236           continue;
3237         break;
3238       }
3239       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3240         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3241                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3242                "Declaret target link address is set.");
3243         if (CGM.getLangOpts().OpenMPIsDevice)
3244           continue;
3245         if (!CE->getAddress()) {
3246           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3247               DiagnosticsEngine::Error,
3248               "Offloading entry for declare target variable is incorrect: the "
3249               "address is invalid.");
3250           CGM.getDiags().Report(DiagID);
3251           continue;
3252         }
3253         break;
3254       }
3255       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3256                          CE->getVarSize().getQuantity(), Flags,
3257                          CE->getLinkage());
3258     } else {
3259       llvm_unreachable("Unsupported entry kind.");
3260     }
3261   }
3262 }
3263 
3264 /// Loads all the offload entries information from the host IR
3265 /// metadata.
3266 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3267   // If we are in target mode, load the metadata from the host IR. This code has
3268   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3269 
3270   if (!CGM.getLangOpts().OpenMPIsDevice)
3271     return;
3272 
3273   if (CGM.getLangOpts().OMPHostIRFile.empty())
3274     return;
3275 
3276   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3277   if (auto EC = Buf.getError()) {
3278     CGM.getDiags().Report(diag::err_cannot_open_file)
3279         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3280     return;
3281   }
3282 
3283   llvm::LLVMContext C;
3284   auto ME = expectedToErrorOrAndEmitErrors(
3285       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3286 
3287   if (auto EC = ME.getError()) {
3288     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3289         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3290     CGM.getDiags().Report(DiagID)
3291         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3292     return;
3293   }
3294 
3295   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3296   if (!MD)
3297     return;
3298 
3299   for (llvm::MDNode *MN : MD->operands()) {
3300     auto &&GetMDInt = [MN](unsigned Idx) {
3301       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3302       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3303     };
3304 
3305     auto &&GetMDString = [MN](unsigned Idx) {
3306       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3307       return V->getString();
3308     };
3309 
3310     switch (GetMDInt(0)) {
3311     default:
3312       llvm_unreachable("Unexpected metadata!");
3313       break;
3314     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3315         OffloadingEntryInfoTargetRegion:
3316       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3317           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3318           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3319           /*Order=*/GetMDInt(5));
3320       break;
3321     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3322         OffloadingEntryInfoDeviceGlobalVar:
3323       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3324           /*MangledName=*/GetMDString(1),
3325           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3326               /*Flags=*/GetMDInt(2)),
3327           /*Order=*/GetMDInt(3));
3328       break;
3329     }
3330   }
3331 }
3332 
3333 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3334   if (!KmpRoutineEntryPtrTy) {
3335     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3336     ASTContext &C = CGM.getContext();
3337     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3338     FunctionProtoType::ExtProtoInfo EPI;
3339     KmpRoutineEntryPtrQTy = C.getPointerType(
3340         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3341     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3342   }
3343 }
3344 
3345 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3346   // Make sure the type of the entry is already created. This is the type we
3347   // have to create:
3348   // struct __tgt_offload_entry{
3349   //   void      *addr;       // Pointer to the offload entry info.
3350   //                          // (function or global)
3351   //   char      *name;       // Name of the function or global.
3352   //   size_t     size;       // Size of the entry info (0 if it a function).
3353   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3354   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3355   // };
3356   if (TgtOffloadEntryQTy.isNull()) {
3357     ASTContext &C = CGM.getContext();
3358     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3359     RD->startDefinition();
3360     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3361     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3362     addFieldToRecordDecl(C, RD, C.getSizeType());
3363     addFieldToRecordDecl(
3364         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3365     addFieldToRecordDecl(
3366         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3367     RD->completeDefinition();
3368     RD->addAttr(PackedAttr::CreateImplicit(C));
3369     TgtOffloadEntryQTy = C.getRecordType(RD);
3370   }
3371   return TgtOffloadEntryQTy;
3372 }
3373 
3374 namespace {
3375 struct PrivateHelpersTy {
3376   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3377                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3378       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3379         PrivateElemInit(PrivateElemInit) {}
3380   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3381   const Expr *OriginalRef = nullptr;
3382   const VarDecl *Original = nullptr;
3383   const VarDecl *PrivateCopy = nullptr;
3384   const VarDecl *PrivateElemInit = nullptr;
3385   bool isLocalPrivate() const {
3386     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3387   }
3388 };
3389 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3390 } // anonymous namespace
3391 
3392 static bool isAllocatableDecl(const VarDecl *VD) {
3393   const VarDecl *CVD = VD->getCanonicalDecl();
3394   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3395     return false;
3396   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3397   // Use the default allocation.
3398   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3399             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3400            !AA->getAllocator());
3401 }
3402 
3403 static RecordDecl *
3404 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3405   if (!Privates.empty()) {
3406     ASTContext &C = CGM.getContext();
3407     // Build struct .kmp_privates_t. {
3408     //         /*  private vars  */
3409     //       };
3410     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3411     RD->startDefinition();
3412     for (const auto &Pair : Privates) {
3413       const VarDecl *VD = Pair.second.Original;
3414       QualType Type = VD->getType().getNonReferenceType();
3415       // If the private variable is a local variable with lvalue ref type,
3416       // allocate the pointer instead of the pointee type.
3417       if (Pair.second.isLocalPrivate()) {
3418         if (VD->getType()->isLValueReferenceType())
3419           Type = C.getPointerType(Type);
3420         if (isAllocatableDecl(VD))
3421           Type = C.getPointerType(Type);
3422       }
3423       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3424       if (VD->hasAttrs()) {
3425         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3426              E(VD->getAttrs().end());
3427              I != E; ++I)
3428           FD->addAttr(*I);
3429       }
3430     }
3431     RD->completeDefinition();
3432     return RD;
3433   }
3434   return nullptr;
3435 }
3436 
3437 static RecordDecl *
3438 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3439                          QualType KmpInt32Ty,
3440                          QualType KmpRoutineEntryPointerQTy) {
3441   ASTContext &C = CGM.getContext();
3442   // Build struct kmp_task_t {
3443   //         void *              shareds;
3444   //         kmp_routine_entry_t routine;
3445   //         kmp_int32           part_id;
3446   //         kmp_cmplrdata_t data1;
3447   //         kmp_cmplrdata_t data2;
3448   // For taskloops additional fields:
3449   //         kmp_uint64          lb;
3450   //         kmp_uint64          ub;
3451   //         kmp_int64           st;
3452   //         kmp_int32           liter;
3453   //         void *              reductions;
3454   //       };
3455   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3456   UD->startDefinition();
3457   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3458   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3459   UD->completeDefinition();
3460   QualType KmpCmplrdataTy = C.getRecordType(UD);
3461   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3462   RD->startDefinition();
3463   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3464   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3465   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3466   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3467   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3468   if (isOpenMPTaskLoopDirective(Kind)) {
3469     QualType KmpUInt64Ty =
3470         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3471     QualType KmpInt64Ty =
3472         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3473     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3474     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3475     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3476     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3477     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3478   }
3479   RD->completeDefinition();
3480   return RD;
3481 }
3482 
3483 static RecordDecl *
3484 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3485                                      ArrayRef<PrivateDataTy> Privates) {
3486   ASTContext &C = CGM.getContext();
3487   // Build struct kmp_task_t_with_privates {
3488   //         kmp_task_t task_data;
3489   //         .kmp_privates_t. privates;
3490   //       };
3491   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3492   RD->startDefinition();
3493   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3494   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3495     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3496   RD->completeDefinition();
3497   return RD;
3498 }
3499 
3500 /// Emit a proxy function which accepts kmp_task_t as the second
3501 /// argument.
3502 /// \code
3503 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3504 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3505 ///   For taskloops:
3506 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3507 ///   tt->reductions, tt->shareds);
3508 ///   return 0;
3509 /// }
3510 /// \endcode
3511 static llvm::Function *
3512 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3513                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3514                       QualType KmpTaskTWithPrivatesPtrQTy,
3515                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3516                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3517                       llvm::Value *TaskPrivatesMap) {
3518   ASTContext &C = CGM.getContext();
3519   FunctionArgList Args;
3520   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3521                             ImplicitParamDecl::Other);
3522   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3523                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3524                                 ImplicitParamDecl::Other);
3525   Args.push_back(&GtidArg);
3526   Args.push_back(&TaskTypeArg);
3527   const auto &TaskEntryFnInfo =
3528       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3529   llvm::FunctionType *TaskEntryTy =
3530       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3531   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3532   auto *TaskEntry = llvm::Function::Create(
3533       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3534   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3535   TaskEntry->setDoesNotRecurse();
3536   CodeGenFunction CGF(CGM);
3537   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3538                     Loc, Loc);
3539 
3540   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3541   // tt,
3542   // For taskloops:
3543   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3544   // tt->task_data.shareds);
3545   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3546       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3547   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3548       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3549       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3550   const auto *KmpTaskTWithPrivatesQTyRD =
3551       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3552   LValue Base =
3553       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3554   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3555   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3556   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3557   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3558 
3559   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3560   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3561   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3562       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3563       CGF.ConvertTypeForMem(SharedsPtrTy));
3564 
3565   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3566   llvm::Value *PrivatesParam;
3567   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3568     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3569     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3570         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3571   } else {
3572     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3573   }
3574 
3575   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3576                                TaskPrivatesMap,
3577                                CGF.Builder
3578                                    .CreatePointerBitCastOrAddrSpaceCast(
3579                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3580                                    .getPointer()};
3581   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3582                                           std::end(CommonArgs));
3583   if (isOpenMPTaskLoopDirective(Kind)) {
3584     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3585     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3586     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3587     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3588     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3589     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3590     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3591     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3592     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3593     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3594     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3595     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3596     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3597     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3598     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3599     CallArgs.push_back(LBParam);
3600     CallArgs.push_back(UBParam);
3601     CallArgs.push_back(StParam);
3602     CallArgs.push_back(LIParam);
3603     CallArgs.push_back(RParam);
3604   }
3605   CallArgs.push_back(SharedsParam);
3606 
3607   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3608                                                   CallArgs);
3609   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3610                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3611   CGF.FinishFunction();
3612   return TaskEntry;
3613 }
3614 
3615 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3616                                             SourceLocation Loc,
3617                                             QualType KmpInt32Ty,
3618                                             QualType KmpTaskTWithPrivatesPtrQTy,
3619                                             QualType KmpTaskTWithPrivatesQTy) {
3620   ASTContext &C = CGM.getContext();
3621   FunctionArgList Args;
3622   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3623                             ImplicitParamDecl::Other);
3624   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3625                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3626                                 ImplicitParamDecl::Other);
3627   Args.push_back(&GtidArg);
3628   Args.push_back(&TaskTypeArg);
3629   const auto &DestructorFnInfo =
3630       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3631   llvm::FunctionType *DestructorFnTy =
3632       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3633   std::string Name =
3634       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3635   auto *DestructorFn =
3636       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3637                              Name, &CGM.getModule());
3638   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3639                                     DestructorFnInfo);
3640   DestructorFn->setDoesNotRecurse();
3641   CodeGenFunction CGF(CGM);
3642   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3643                     Args, Loc, Loc);
3644 
3645   LValue Base = CGF.EmitLoadOfPointerLValue(
3646       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3647       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3648   const auto *KmpTaskTWithPrivatesQTyRD =
3649       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3650   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3651   Base = CGF.EmitLValueForField(Base, *FI);
3652   for (const auto *Field :
3653        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3654     if (QualType::DestructionKind DtorKind =
3655             Field->getType().isDestructedType()) {
3656       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3657       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3658     }
3659   }
3660   CGF.FinishFunction();
3661   return DestructorFn;
3662 }
3663 
3664 /// Emit a privates mapping function for correct handling of private and
3665 /// firstprivate variables.
3666 /// \code
3667 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3668 /// **noalias priv1,...,  <tyn> **noalias privn) {
3669 ///   *priv1 = &.privates.priv1;
3670 ///   ...;
3671 ///   *privn = &.privates.privn;
3672 /// }
3673 /// \endcode
3674 static llvm::Value *
3675 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3676                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3677                                ArrayRef<PrivateDataTy> Privates) {
3678   ASTContext &C = CGM.getContext();
3679   FunctionArgList Args;
3680   ImplicitParamDecl TaskPrivatesArg(
3681       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3682       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3683       ImplicitParamDecl::Other);
3684   Args.push_back(&TaskPrivatesArg);
3685   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3686   unsigned Counter = 1;
3687   for (const Expr *E : Data.PrivateVars) {
3688     Args.push_back(ImplicitParamDecl::Create(
3689         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3690         C.getPointerType(C.getPointerType(E->getType()))
3691             .withConst()
3692             .withRestrict(),
3693         ImplicitParamDecl::Other));
3694     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3695     PrivateVarsPos[VD] = Counter;
3696     ++Counter;
3697   }
3698   for (const Expr *E : Data.FirstprivateVars) {
3699     Args.push_back(ImplicitParamDecl::Create(
3700         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3701         C.getPointerType(C.getPointerType(E->getType()))
3702             .withConst()
3703             .withRestrict(),
3704         ImplicitParamDecl::Other));
3705     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3706     PrivateVarsPos[VD] = Counter;
3707     ++Counter;
3708   }
3709   for (const Expr *E : Data.LastprivateVars) {
3710     Args.push_back(ImplicitParamDecl::Create(
3711         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3712         C.getPointerType(C.getPointerType(E->getType()))
3713             .withConst()
3714             .withRestrict(),
3715         ImplicitParamDecl::Other));
3716     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3717     PrivateVarsPos[VD] = Counter;
3718     ++Counter;
3719   }
3720   for (const VarDecl *VD : Data.PrivateLocals) {
3721     QualType Ty = VD->getType().getNonReferenceType();
3722     if (VD->getType()->isLValueReferenceType())
3723       Ty = C.getPointerType(Ty);
3724     if (isAllocatableDecl(VD))
3725       Ty = C.getPointerType(Ty);
3726     Args.push_back(ImplicitParamDecl::Create(
3727         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3728         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3729         ImplicitParamDecl::Other));
3730     PrivateVarsPos[VD] = Counter;
3731     ++Counter;
3732   }
3733   const auto &TaskPrivatesMapFnInfo =
3734       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3735   llvm::FunctionType *TaskPrivatesMapTy =
3736       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3737   std::string Name =
3738       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3739   auto *TaskPrivatesMap = llvm::Function::Create(
3740       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3741       &CGM.getModule());
3742   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3743                                     TaskPrivatesMapFnInfo);
3744   if (CGM.getLangOpts().Optimize) {
3745     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3746     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3747     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3748   }
3749   CodeGenFunction CGF(CGM);
3750   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3751                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3752 
3753   // *privi = &.privates.privi;
3754   LValue Base = CGF.EmitLoadOfPointerLValue(
3755       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3756       TaskPrivatesArg.getType()->castAs<PointerType>());
3757   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3758   Counter = 0;
3759   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3760     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3761     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3762     LValue RefLVal =
3763         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3764     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3765         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3766     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3767     ++Counter;
3768   }
3769   CGF.FinishFunction();
3770   return TaskPrivatesMap;
3771 }
3772 
3773 /// Emit initialization for private variables in task-based directives.
3774 static void emitPrivatesInit(CodeGenFunction &CGF,
3775                              const OMPExecutableDirective &D,
3776                              Address KmpTaskSharedsPtr, LValue TDBase,
3777                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3778                              QualType SharedsTy, QualType SharedsPtrTy,
3779                              const OMPTaskDataTy &Data,
3780                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3781   ASTContext &C = CGF.getContext();
3782   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3783   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3784   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3785                                  ? OMPD_taskloop
3786                                  : OMPD_task;
3787   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3788   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3789   LValue SrcBase;
3790   bool IsTargetTask =
3791       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3792       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3793   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3794   // PointersArray, SizesArray, and MappersArray. The original variables for
3795   // these arrays are not captured and we get their addresses explicitly.
3796   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3797       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3798     SrcBase = CGF.MakeAddrLValue(
3799         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3800             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3801         SharedsTy);
3802   }
3803   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3804   for (const PrivateDataTy &Pair : Privates) {
3805     // Do not initialize private locals.
3806     if (Pair.second.isLocalPrivate()) {
3807       ++FI;
3808       continue;
3809     }
3810     const VarDecl *VD = Pair.second.PrivateCopy;
3811     const Expr *Init = VD->getAnyInitializer();
3812     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3813                              !CGF.isTrivialInitializer(Init)))) {
3814       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3815       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3816         const VarDecl *OriginalVD = Pair.second.Original;
3817         // Check if the variable is the target-based BasePointersArray,
3818         // PointersArray, SizesArray, or MappersArray.
3819         LValue SharedRefLValue;
3820         QualType Type = PrivateLValue.getType();
3821         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3822         if (IsTargetTask && !SharedField) {
3823           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3824                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3825                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3826                          ->getNumParams() == 0 &&
3827                  isa<TranslationUnitDecl>(
3828                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3829                          ->getDeclContext()) &&
3830                  "Expected artificial target data variable.");
3831           SharedRefLValue =
3832               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3833         } else if (ForDup) {
3834           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3835           SharedRefLValue = CGF.MakeAddrLValue(
3836               Address(SharedRefLValue.getPointer(CGF),
3837                       C.getDeclAlign(OriginalVD)),
3838               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3839               SharedRefLValue.getTBAAInfo());
3840         } else if (CGF.LambdaCaptureFields.count(
3841                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3842                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3843           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3844         } else {
3845           // Processing for implicitly captured variables.
3846           InlinedOpenMPRegionRAII Region(
3847               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3848               /*HasCancel=*/false);
3849           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3850         }
3851         if (Type->isArrayType()) {
3852           // Initialize firstprivate array.
3853           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3854             // Perform simple memcpy.
3855             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3856           } else {
3857             // Initialize firstprivate array using element-by-element
3858             // initialization.
3859             CGF.EmitOMPAggregateAssign(
3860                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3861                 Type,
3862                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3863                                                   Address SrcElement) {
3864                   // Clean up any temporaries needed by the initialization.
3865                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3866                   InitScope.addPrivate(
3867                       Elem, [SrcElement]() -> Address { return SrcElement; });
3868                   (void)InitScope.Privatize();
3869                   // Emit initialization for single element.
3870                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3871                       CGF, &CapturesInfo);
3872                   CGF.EmitAnyExprToMem(Init, DestElement,
3873                                        Init->getType().getQualifiers(),
3874                                        /*IsInitializer=*/false);
3875                 });
3876           }
3877         } else {
3878           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3879           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3880             return SharedRefLValue.getAddress(CGF);
3881           });
3882           (void)InitScope.Privatize();
3883           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3884           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3885                              /*capturedByInit=*/false);
3886         }
3887       } else {
3888         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3889       }
3890     }
3891     ++FI;
3892   }
3893 }
3894 
3895 /// Check if duplication function is required for taskloops.
3896 static bool checkInitIsRequired(CodeGenFunction &CGF,
3897                                 ArrayRef<PrivateDataTy> Privates) {
3898   bool InitRequired = false;
3899   for (const PrivateDataTy &Pair : Privates) {
3900     if (Pair.second.isLocalPrivate())
3901       continue;
3902     const VarDecl *VD = Pair.second.PrivateCopy;
3903     const Expr *Init = VD->getAnyInitializer();
3904     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3905                                     !CGF.isTrivialInitializer(Init));
3906     if (InitRequired)
3907       break;
3908   }
3909   return InitRequired;
3910 }
3911 
3912 
3913 /// Emit task_dup function (for initialization of
3914 /// private/firstprivate/lastprivate vars and last_iter flag)
3915 /// \code
3916 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3917 /// lastpriv) {
3918 /// // setup lastprivate flag
3919 ///    task_dst->last = lastpriv;
3920 /// // could be constructor calls here...
3921 /// }
3922 /// \endcode
3923 static llvm::Value *
3924 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3925                     const OMPExecutableDirective &D,
3926                     QualType KmpTaskTWithPrivatesPtrQTy,
3927                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3928                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3929                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3930                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3931   ASTContext &C = CGM.getContext();
3932   FunctionArgList Args;
3933   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3934                            KmpTaskTWithPrivatesPtrQTy,
3935                            ImplicitParamDecl::Other);
3936   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3937                            KmpTaskTWithPrivatesPtrQTy,
3938                            ImplicitParamDecl::Other);
3939   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3940                                 ImplicitParamDecl::Other);
3941   Args.push_back(&DstArg);
3942   Args.push_back(&SrcArg);
3943   Args.push_back(&LastprivArg);
3944   const auto &TaskDupFnInfo =
3945       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3946   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3947   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3948   auto *TaskDup = llvm::Function::Create(
3949       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3950   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3951   TaskDup->setDoesNotRecurse();
3952   CodeGenFunction CGF(CGM);
3953   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3954                     Loc);
3955 
3956   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3957       CGF.GetAddrOfLocalVar(&DstArg),
3958       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3959   // task_dst->liter = lastpriv;
3960   if (WithLastIter) {
3961     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3962     LValue Base = CGF.EmitLValueForField(
3963         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3964     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3965     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3966         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3967     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3968   }
3969 
3970   // Emit initial values for private copies (if any).
3971   assert(!Privates.empty());
3972   Address KmpTaskSharedsPtr = Address::invalid();
3973   if (!Data.FirstprivateVars.empty()) {
3974     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3975         CGF.GetAddrOfLocalVar(&SrcArg),
3976         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3977     LValue Base = CGF.EmitLValueForField(
3978         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3979     KmpTaskSharedsPtr = Address(
3980         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3981                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3982                                                   KmpTaskTShareds)),
3983                              Loc),
3984         CGM.getNaturalTypeAlignment(SharedsTy));
3985   }
3986   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3987                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3988   CGF.FinishFunction();
3989   return TaskDup;
3990 }
3991 
3992 /// Checks if destructor function is required to be generated.
3993 /// \return true if cleanups are required, false otherwise.
3994 static bool
3995 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3996                          ArrayRef<PrivateDataTy> Privates) {
3997   for (const PrivateDataTy &P : Privates) {
3998     if (P.second.isLocalPrivate())
3999       continue;
4000     QualType Ty = P.second.Original->getType().getNonReferenceType();
4001     if (Ty.isDestructedType())
4002       return true;
4003   }
4004   return false;
4005 }
4006 
4007 namespace {
4008 /// Loop generator for OpenMP iterator expression.
4009 class OMPIteratorGeneratorScope final
4010     : public CodeGenFunction::OMPPrivateScope {
4011   CodeGenFunction &CGF;
4012   const OMPIteratorExpr *E = nullptr;
4013   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4014   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4015   OMPIteratorGeneratorScope() = delete;
4016   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4017 
4018 public:
4019   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4020       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4021     if (!E)
4022       return;
4023     SmallVector<llvm::Value *, 4> Uppers;
4024     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4025       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4026       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4027       addPrivate(VD, [&CGF, VD]() {
4028         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4029       });
4030       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4031       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4032         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4033                                  "counter.addr");
4034       });
4035     }
4036     Privatize();
4037 
4038     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4039       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4040       LValue CLVal =
4041           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4042                              HelperData.CounterVD->getType());
4043       // Counter = 0;
4044       CGF.EmitStoreOfScalar(
4045           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4046           CLVal);
4047       CodeGenFunction::JumpDest &ContDest =
4048           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4049       CodeGenFunction::JumpDest &ExitDest =
4050           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4051       // N = <number-of_iterations>;
4052       llvm::Value *N = Uppers[I];
4053       // cont:
4054       // if (Counter < N) goto body; else goto exit;
4055       CGF.EmitBlock(ContDest.getBlock());
4056       auto *CVal =
4057           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4058       llvm::Value *Cmp =
4059           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4060               ? CGF.Builder.CreateICmpSLT(CVal, N)
4061               : CGF.Builder.CreateICmpULT(CVal, N);
4062       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4063       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4064       // body:
4065       CGF.EmitBlock(BodyBB);
4066       // Iteri = Begini + Counter * Stepi;
4067       CGF.EmitIgnoredExpr(HelperData.Update);
4068     }
4069   }
4070   ~OMPIteratorGeneratorScope() {
4071     if (!E)
4072       return;
4073     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4074       // Counter = Counter + 1;
4075       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4076       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4077       // goto cont;
4078       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4079       // exit:
4080       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4081     }
4082   }
4083 };
4084 } // namespace
4085 
4086 static std::pair<llvm::Value *, llvm::Value *>
4087 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4088   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4089   llvm::Value *Addr;
4090   if (OASE) {
4091     const Expr *Base = OASE->getBase();
4092     Addr = CGF.EmitScalarExpr(Base);
4093   } else {
4094     Addr = CGF.EmitLValue(E).getPointer(CGF);
4095   }
4096   llvm::Value *SizeVal;
4097   QualType Ty = E->getType();
4098   if (OASE) {
4099     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4100     for (const Expr *SE : OASE->getDimensions()) {
4101       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4102       Sz = CGF.EmitScalarConversion(
4103           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4104       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4105     }
4106   } else if (const auto *ASE =
4107                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4108     LValue UpAddrLVal =
4109         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4110     llvm::Value *UpAddr =
4111         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4112     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4113     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4114     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4115   } else {
4116     SizeVal = CGF.getTypeSize(Ty);
4117   }
4118   return std::make_pair(Addr, SizeVal);
4119 }
4120 
4121 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4122 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4123   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4124   if (KmpTaskAffinityInfoTy.isNull()) {
4125     RecordDecl *KmpAffinityInfoRD =
4126         C.buildImplicitRecord("kmp_task_affinity_info_t");
4127     KmpAffinityInfoRD->startDefinition();
4128     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4129     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4130     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4131     KmpAffinityInfoRD->completeDefinition();
4132     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4133   }
4134 }
4135 
4136 CGOpenMPRuntime::TaskResultTy
4137 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4138                               const OMPExecutableDirective &D,
4139                               llvm::Function *TaskFunction, QualType SharedsTy,
4140                               Address Shareds, const OMPTaskDataTy &Data) {
4141   ASTContext &C = CGM.getContext();
4142   llvm::SmallVector<PrivateDataTy, 4> Privates;
4143   // Aggregate privates and sort them by the alignment.
4144   const auto *I = Data.PrivateCopies.begin();
4145   for (const Expr *E : Data.PrivateVars) {
4146     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4147     Privates.emplace_back(
4148         C.getDeclAlign(VD),
4149         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4150                          /*PrivateElemInit=*/nullptr));
4151     ++I;
4152   }
4153   I = Data.FirstprivateCopies.begin();
4154   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4155   for (const Expr *E : Data.FirstprivateVars) {
4156     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4157     Privates.emplace_back(
4158         C.getDeclAlign(VD),
4159         PrivateHelpersTy(
4160             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4161             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4162     ++I;
4163     ++IElemInitRef;
4164   }
4165   I = Data.LastprivateCopies.begin();
4166   for (const Expr *E : Data.LastprivateVars) {
4167     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4168     Privates.emplace_back(
4169         C.getDeclAlign(VD),
4170         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4171                          /*PrivateElemInit=*/nullptr));
4172     ++I;
4173   }
4174   for (const VarDecl *VD : Data.PrivateLocals) {
4175     if (isAllocatableDecl(VD))
4176       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4177     else
4178       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4179   }
4180   llvm::stable_sort(Privates,
4181                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4182                       return L.first > R.first;
4183                     });
4184   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4185   // Build type kmp_routine_entry_t (if not built yet).
4186   emitKmpRoutineEntryT(KmpInt32Ty);
4187   // Build type kmp_task_t (if not built yet).
4188   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4189     if (SavedKmpTaskloopTQTy.isNull()) {
4190       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4191           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4192     }
4193     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4194   } else {
4195     assert((D.getDirectiveKind() == OMPD_task ||
4196             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4197             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4198            "Expected taskloop, task or target directive");
4199     if (SavedKmpTaskTQTy.isNull()) {
4200       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4201           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4202     }
4203     KmpTaskTQTy = SavedKmpTaskTQTy;
4204   }
4205   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4206   // Build particular struct kmp_task_t for the given task.
4207   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4208       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4209   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4210   QualType KmpTaskTWithPrivatesPtrQTy =
4211       C.getPointerType(KmpTaskTWithPrivatesQTy);
4212   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4213   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4214       KmpTaskTWithPrivatesTy->getPointerTo();
4215   llvm::Value *KmpTaskTWithPrivatesTySize =
4216       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4217   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4218 
4219   // Emit initial values for private copies (if any).
4220   llvm::Value *TaskPrivatesMap = nullptr;
4221   llvm::Type *TaskPrivatesMapTy =
4222       std::next(TaskFunction->arg_begin(), 3)->getType();
4223   if (!Privates.empty()) {
4224     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4225     TaskPrivatesMap =
4226         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4227     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4228         TaskPrivatesMap, TaskPrivatesMapTy);
4229   } else {
4230     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4231         cast<llvm::PointerType>(TaskPrivatesMapTy));
4232   }
4233   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4234   // kmp_task_t *tt);
4235   llvm::Function *TaskEntry = emitProxyTaskFunction(
4236       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4237       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4238       TaskPrivatesMap);
4239 
4240   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4241   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4242   // kmp_routine_entry_t *task_entry);
4243   // Task flags. Format is taken from
4244   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4245   // description of kmp_tasking_flags struct.
4246   enum {
4247     TiedFlag = 0x1,
4248     FinalFlag = 0x2,
4249     DestructorsFlag = 0x8,
4250     PriorityFlag = 0x20,
4251     DetachableFlag = 0x40,
4252   };
4253   unsigned Flags = Data.Tied ? TiedFlag : 0;
4254   bool NeedsCleanup = false;
4255   if (!Privates.empty()) {
4256     NeedsCleanup =
4257         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4258     if (NeedsCleanup)
4259       Flags = Flags | DestructorsFlag;
4260   }
4261   if (Data.Priority.getInt())
4262     Flags = Flags | PriorityFlag;
4263   if (D.hasClausesOfKind<OMPDetachClause>())
4264     Flags = Flags | DetachableFlag;
4265   llvm::Value *TaskFlags =
4266       Data.Final.getPointer()
4267           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4268                                      CGF.Builder.getInt32(FinalFlag),
4269                                      CGF.Builder.getInt32(/*C=*/0))
4270           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4271   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4272   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4273   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4274       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4275       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4276           TaskEntry, KmpRoutineEntryPtrTy)};
4277   llvm::Value *NewTask;
4278   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4279     // Check if we have any device clause associated with the directive.
4280     const Expr *Device = nullptr;
4281     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4282       Device = C->getDevice();
4283     // Emit device ID if any otherwise use default value.
4284     llvm::Value *DeviceID;
4285     if (Device)
4286       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4287                                            CGF.Int64Ty, /*isSigned=*/true);
4288     else
4289       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4290     AllocArgs.push_back(DeviceID);
4291     NewTask = CGF.EmitRuntimeCall(
4292         OMPBuilder.getOrCreateRuntimeFunction(
4293             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4294         AllocArgs);
4295   } else {
4296     NewTask =
4297         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4298                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4299                             AllocArgs);
4300   }
4301   // Emit detach clause initialization.
4302   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4303   // task_descriptor);
4304   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4305     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4306     LValue EvtLVal = CGF.EmitLValue(Evt);
4307 
4308     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4309     // int gtid, kmp_task_t *task);
4310     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4311     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4312     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4313     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4314         OMPBuilder.getOrCreateRuntimeFunction(
4315             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4316         {Loc, Tid, NewTask});
4317     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4318                                       Evt->getExprLoc());
4319     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4320   }
4321   // Process affinity clauses.
4322   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4323     // Process list of affinity data.
4324     ASTContext &C = CGM.getContext();
4325     Address AffinitiesArray = Address::invalid();
4326     // Calculate number of elements to form the array of affinity data.
4327     llvm::Value *NumOfElements = nullptr;
4328     unsigned NumAffinities = 0;
4329     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4330       if (const Expr *Modifier = C->getModifier()) {
4331         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4332         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4333           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4334           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4335           NumOfElements =
4336               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4337         }
4338       } else {
4339         NumAffinities += C->varlist_size();
4340       }
4341     }
4342     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4343     // Fields ids in kmp_task_affinity_info record.
4344     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4345 
4346     QualType KmpTaskAffinityInfoArrayTy;
4347     if (NumOfElements) {
4348       NumOfElements = CGF.Builder.CreateNUWAdd(
4349           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4350       OpaqueValueExpr OVE(
4351           Loc,
4352           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4353           VK_RValue);
4354       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4355                                                     RValue::get(NumOfElements));
4356       KmpTaskAffinityInfoArrayTy =
4357           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4358                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4359       // Properly emit variable-sized array.
4360       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4361                                            ImplicitParamDecl::Other);
4362       CGF.EmitVarDecl(*PD);
4363       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4364       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4365                                                 /*isSigned=*/false);
4366     } else {
4367       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4368           KmpTaskAffinityInfoTy,
4369           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4370           ArrayType::Normal, /*IndexTypeQuals=*/0);
4371       AffinitiesArray =
4372           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4373       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4374       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4375                                              /*isSigned=*/false);
4376     }
4377 
4378     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4379     // Fill array by elements without iterators.
4380     unsigned Pos = 0;
4381     bool HasIterator = false;
4382     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4383       if (C->getModifier()) {
4384         HasIterator = true;
4385         continue;
4386       }
4387       for (const Expr *E : C->varlists()) {
4388         llvm::Value *Addr;
4389         llvm::Value *Size;
4390         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4391         LValue Base =
4392             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4393                                KmpTaskAffinityInfoTy);
4394         // affs[i].base_addr = &<Affinities[i].second>;
4395         LValue BaseAddrLVal = CGF.EmitLValueForField(
4396             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4397         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4398                               BaseAddrLVal);
4399         // affs[i].len = sizeof(<Affinities[i].second>);
4400         LValue LenLVal = CGF.EmitLValueForField(
4401             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4402         CGF.EmitStoreOfScalar(Size, LenLVal);
4403         ++Pos;
4404       }
4405     }
4406     LValue PosLVal;
4407     if (HasIterator) {
4408       PosLVal = CGF.MakeAddrLValue(
4409           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4410           C.getSizeType());
4411       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4412     }
4413     // Process elements with iterators.
4414     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4415       const Expr *Modifier = C->getModifier();
4416       if (!Modifier)
4417         continue;
4418       OMPIteratorGeneratorScope IteratorScope(
4419           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4420       for (const Expr *E : C->varlists()) {
4421         llvm::Value *Addr;
4422         llvm::Value *Size;
4423         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4424         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4425         LValue Base = CGF.MakeAddrLValue(
4426             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4427                     AffinitiesArray.getAlignment()),
4428             KmpTaskAffinityInfoTy);
4429         // affs[i].base_addr = &<Affinities[i].second>;
4430         LValue BaseAddrLVal = CGF.EmitLValueForField(
4431             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4432         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4433                               BaseAddrLVal);
4434         // affs[i].len = sizeof(<Affinities[i].second>);
4435         LValue LenLVal = CGF.EmitLValueForField(
4436             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4437         CGF.EmitStoreOfScalar(Size, LenLVal);
4438         Idx = CGF.Builder.CreateNUWAdd(
4439             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4440         CGF.EmitStoreOfScalar(Idx, PosLVal);
4441       }
4442     }
4443     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4444     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4445     // naffins, kmp_task_affinity_info_t *affin_list);
4446     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4447     llvm::Value *GTid = getThreadID(CGF, Loc);
4448     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4449         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4450     // FIXME: Emit the function and ignore its result for now unless the
4451     // runtime function is properly implemented.
4452     (void)CGF.EmitRuntimeCall(
4453         OMPBuilder.getOrCreateRuntimeFunction(
4454             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4455         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4456   }
4457   llvm::Value *NewTaskNewTaskTTy =
4458       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4459           NewTask, KmpTaskTWithPrivatesPtrTy);
4460   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4461                                                KmpTaskTWithPrivatesQTy);
4462   LValue TDBase =
4463       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4464   // Fill the data in the resulting kmp_task_t record.
4465   // Copy shareds if there are any.
4466   Address KmpTaskSharedsPtr = Address::invalid();
4467   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4468     KmpTaskSharedsPtr =
4469         Address(CGF.EmitLoadOfScalar(
4470                     CGF.EmitLValueForField(
4471                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4472                                            KmpTaskTShareds)),
4473                     Loc),
4474                 CGM.getNaturalTypeAlignment(SharedsTy));
4475     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4476     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4477     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4478   }
4479   // Emit initial values for private copies (if any).
4480   TaskResultTy Result;
4481   if (!Privates.empty()) {
4482     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4483                      SharedsTy, SharedsPtrTy, Data, Privates,
4484                      /*ForDup=*/false);
4485     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4486         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4487       Result.TaskDupFn = emitTaskDupFunction(
4488           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4489           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4490           /*WithLastIter=*/!Data.LastprivateVars.empty());
4491     }
4492   }
4493   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4494   enum { Priority = 0, Destructors = 1 };
4495   // Provide pointer to function with destructors for privates.
4496   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4497   const RecordDecl *KmpCmplrdataUD =
4498       (*FI)->getType()->getAsUnionType()->getDecl();
4499   if (NeedsCleanup) {
4500     llvm::Value *DestructorFn = emitDestructorsFunction(
4501         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4502         KmpTaskTWithPrivatesQTy);
4503     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4504     LValue DestructorsLV = CGF.EmitLValueForField(
4505         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4506     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4507                               DestructorFn, KmpRoutineEntryPtrTy),
4508                           DestructorsLV);
4509   }
4510   // Set priority.
4511   if (Data.Priority.getInt()) {
4512     LValue Data2LV = CGF.EmitLValueForField(
4513         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4514     LValue PriorityLV = CGF.EmitLValueForField(
4515         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4516     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4517   }
4518   Result.NewTask = NewTask;
4519   Result.TaskEntry = TaskEntry;
4520   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4521   Result.TDBase = TDBase;
4522   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4523   return Result;
4524 }
4525 
4526 namespace {
4527 /// Dependence kind for RTL.
4528 enum RTLDependenceKindTy {
4529   DepIn = 0x01,
4530   DepInOut = 0x3,
4531   DepMutexInOutSet = 0x4
4532 };
4533 /// Fields ids in kmp_depend_info record.
4534 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4535 } // namespace
4536 
4537 /// Translates internal dependency kind into the runtime kind.
4538 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4539   RTLDependenceKindTy DepKind;
4540   switch (K) {
4541   case OMPC_DEPEND_in:
4542     DepKind = DepIn;
4543     break;
4544   // Out and InOut dependencies must use the same code.
4545   case OMPC_DEPEND_out:
4546   case OMPC_DEPEND_inout:
4547     DepKind = DepInOut;
4548     break;
4549   case OMPC_DEPEND_mutexinoutset:
4550     DepKind = DepMutexInOutSet;
4551     break;
4552   case OMPC_DEPEND_source:
4553   case OMPC_DEPEND_sink:
4554   case OMPC_DEPEND_depobj:
4555   case OMPC_DEPEND_unknown:
4556     llvm_unreachable("Unknown task dependence type");
4557   }
4558   return DepKind;
4559 }
4560 
4561 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4562 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4563                            QualType &FlagsTy) {
4564   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4565   if (KmpDependInfoTy.isNull()) {
4566     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4567     KmpDependInfoRD->startDefinition();
4568     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4569     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4570     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4571     KmpDependInfoRD->completeDefinition();
4572     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4573   }
4574 }
4575 
4576 std::pair<llvm::Value *, LValue>
4577 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4578                                    SourceLocation Loc) {
4579   ASTContext &C = CGM.getContext();
4580   QualType FlagsTy;
4581   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4582   RecordDecl *KmpDependInfoRD =
4583       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4584   LValue Base = CGF.EmitLoadOfPointerLValue(
4585       DepobjLVal.getAddress(CGF),
4586       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4587   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4588   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4589           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4590   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4591                             Base.getTBAAInfo());
4592   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4593       Addr.getPointer(),
4594       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4595   LValue NumDepsBase = CGF.MakeAddrLValue(
4596       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4597       Base.getBaseInfo(), Base.getTBAAInfo());
4598   // NumDeps = deps[i].base_addr;
4599   LValue BaseAddrLVal = CGF.EmitLValueForField(
4600       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4601   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4602   return std::make_pair(NumDeps, Base);
4603 }
4604 
4605 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4606                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4607                            const OMPTaskDataTy::DependData &Data,
4608                            Address DependenciesArray) {
4609   CodeGenModule &CGM = CGF.CGM;
4610   ASTContext &C = CGM.getContext();
4611   QualType FlagsTy;
4612   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4613   RecordDecl *KmpDependInfoRD =
4614       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4615   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4616 
4617   OMPIteratorGeneratorScope IteratorScope(
4618       CGF, cast_or_null<OMPIteratorExpr>(
4619                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4620                                  : nullptr));
4621   for (const Expr *E : Data.DepExprs) {
4622     llvm::Value *Addr;
4623     llvm::Value *Size;
4624     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4625     LValue Base;
4626     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4627       Base = CGF.MakeAddrLValue(
4628           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4629     } else {
4630       LValue &PosLVal = *Pos.get<LValue *>();
4631       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4632       Base = CGF.MakeAddrLValue(
4633           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4634                   DependenciesArray.getAlignment()),
4635           KmpDependInfoTy);
4636     }
4637     // deps[i].base_addr = &<Dependencies[i].second>;
4638     LValue BaseAddrLVal = CGF.EmitLValueForField(
4639         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4640     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4641                           BaseAddrLVal);
4642     // deps[i].len = sizeof(<Dependencies[i].second>);
4643     LValue LenLVal = CGF.EmitLValueForField(
4644         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4645     CGF.EmitStoreOfScalar(Size, LenLVal);
4646     // deps[i].flags = <Dependencies[i].first>;
4647     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4648     LValue FlagsLVal = CGF.EmitLValueForField(
4649         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4650     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4651                           FlagsLVal);
4652     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4653       ++(*P);
4654     } else {
4655       LValue &PosLVal = *Pos.get<LValue *>();
4656       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4657       Idx = CGF.Builder.CreateNUWAdd(Idx,
4658                                      llvm::ConstantInt::get(Idx->getType(), 1));
4659       CGF.EmitStoreOfScalar(Idx, PosLVal);
4660     }
4661   }
4662 }
4663 
4664 static SmallVector<llvm::Value *, 4>
4665 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4666                         const OMPTaskDataTy::DependData &Data) {
4667   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4668          "Expected depobj dependecy kind.");
4669   SmallVector<llvm::Value *, 4> Sizes;
4670   SmallVector<LValue, 4> SizeLVals;
4671   ASTContext &C = CGF.getContext();
4672   QualType FlagsTy;
4673   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4674   RecordDecl *KmpDependInfoRD =
4675       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4676   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4677   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4678   {
4679     OMPIteratorGeneratorScope IteratorScope(
4680         CGF, cast_or_null<OMPIteratorExpr>(
4681                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4682                                    : nullptr));
4683     for (const Expr *E : Data.DepExprs) {
4684       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4685       LValue Base = CGF.EmitLoadOfPointerLValue(
4686           DepobjLVal.getAddress(CGF),
4687           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4688       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4689           Base.getAddress(CGF), KmpDependInfoPtrT);
4690       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4691                                 Base.getTBAAInfo());
4692       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4693           Addr.getPointer(),
4694           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4695       LValue NumDepsBase = CGF.MakeAddrLValue(
4696           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4697           Base.getBaseInfo(), Base.getTBAAInfo());
4698       // NumDeps = deps[i].base_addr;
4699       LValue BaseAddrLVal = CGF.EmitLValueForField(
4700           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4701       llvm::Value *NumDeps =
4702           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4703       LValue NumLVal = CGF.MakeAddrLValue(
4704           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4705           C.getUIntPtrType());
4706       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4707                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4708       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4709       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4710       CGF.EmitStoreOfScalar(Add, NumLVal);
4711       SizeLVals.push_back(NumLVal);
4712     }
4713   }
4714   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4715     llvm::Value *Size =
4716         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4717     Sizes.push_back(Size);
4718   }
4719   return Sizes;
4720 }
4721 
4722 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4723                                LValue PosLVal,
4724                                const OMPTaskDataTy::DependData &Data,
4725                                Address DependenciesArray) {
4726   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4727          "Expected depobj dependecy kind.");
4728   ASTContext &C = CGF.getContext();
4729   QualType FlagsTy;
4730   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4731   RecordDecl *KmpDependInfoRD =
4732       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4733   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4734   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4735   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4736   {
4737     OMPIteratorGeneratorScope IteratorScope(
4738         CGF, cast_or_null<OMPIteratorExpr>(
4739                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4740                                    : nullptr));
4741     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4742       const Expr *E = Data.DepExprs[I];
4743       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4744       LValue Base = CGF.EmitLoadOfPointerLValue(
4745           DepobjLVal.getAddress(CGF),
4746           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4747       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4748           Base.getAddress(CGF), KmpDependInfoPtrT);
4749       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4750                                 Base.getTBAAInfo());
4751 
4752       // Get number of elements in a single depobj.
4753       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4754           Addr.getPointer(),
4755           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4756       LValue NumDepsBase = CGF.MakeAddrLValue(
4757           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4758           Base.getBaseInfo(), Base.getTBAAInfo());
4759       // NumDeps = deps[i].base_addr;
4760       LValue BaseAddrLVal = CGF.EmitLValueForField(
4761           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4762       llvm::Value *NumDeps =
4763           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4764 
4765       // memcopy dependency data.
4766       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4767           ElSize,
4768           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4769       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4770       Address DepAddr =
4771           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4772                   DependenciesArray.getAlignment());
4773       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4774 
4775       // Increase pos.
4776       // pos += size;
4777       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4778       CGF.EmitStoreOfScalar(Add, PosLVal);
4779     }
4780   }
4781 }
4782 
4783 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4784     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4785     SourceLocation Loc) {
4786   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4787         return D.DepExprs.empty();
4788       }))
4789     return std::make_pair(nullptr, Address::invalid());
4790   // Process list of dependencies.
4791   ASTContext &C = CGM.getContext();
4792   Address DependenciesArray = Address::invalid();
4793   llvm::Value *NumOfElements = nullptr;
4794   unsigned NumDependencies = std::accumulate(
4795       Dependencies.begin(), Dependencies.end(), 0,
4796       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4797         return D.DepKind == OMPC_DEPEND_depobj
4798                    ? V
4799                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4800       });
4801   QualType FlagsTy;
4802   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4803   bool HasDepobjDeps = false;
4804   bool HasRegularWithIterators = false;
4805   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4806   llvm::Value *NumOfRegularWithIterators =
4807       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4808   // Calculate number of depobj dependecies and regular deps with the iterators.
4809   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4810     if (D.DepKind == OMPC_DEPEND_depobj) {
4811       SmallVector<llvm::Value *, 4> Sizes =
4812           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4813       for (llvm::Value *Size : Sizes) {
4814         NumOfDepobjElements =
4815             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4816       }
4817       HasDepobjDeps = true;
4818       continue;
4819     }
4820     // Include number of iterations, if any.
4821     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4822       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4823         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4824         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4825         NumOfRegularWithIterators =
4826             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4827       }
4828       HasRegularWithIterators = true;
4829       continue;
4830     }
4831   }
4832 
4833   QualType KmpDependInfoArrayTy;
4834   if (HasDepobjDeps || HasRegularWithIterators) {
4835     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4836                                            /*isSigned=*/false);
4837     if (HasDepobjDeps) {
4838       NumOfElements =
4839           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4840     }
4841     if (HasRegularWithIterators) {
4842       NumOfElements =
4843           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4844     }
4845     OpaqueValueExpr OVE(Loc,
4846                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4847                         VK_RValue);
4848     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4849                                                   RValue::get(NumOfElements));
4850     KmpDependInfoArrayTy =
4851         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4852                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4853     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4854     // Properly emit variable-sized array.
4855     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4856                                          ImplicitParamDecl::Other);
4857     CGF.EmitVarDecl(*PD);
4858     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4859     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4860                                               /*isSigned=*/false);
4861   } else {
4862     KmpDependInfoArrayTy = C.getConstantArrayType(
4863         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4864         ArrayType::Normal, /*IndexTypeQuals=*/0);
4865     DependenciesArray =
4866         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4867     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4868     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4869                                            /*isSigned=*/false);
4870   }
4871   unsigned Pos = 0;
4872   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4873     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4874         Dependencies[I].IteratorExpr)
4875       continue;
4876     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4877                    DependenciesArray);
4878   }
4879   // Copy regular dependecies with iterators.
4880   LValue PosLVal = CGF.MakeAddrLValue(
4881       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4882   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4883   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4884     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4885         !Dependencies[I].IteratorExpr)
4886       continue;
4887     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4888                    DependenciesArray);
4889   }
4890   // Copy final depobj arrays without iterators.
4891   if (HasDepobjDeps) {
4892     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4893       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4894         continue;
4895       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4896                          DependenciesArray);
4897     }
4898   }
4899   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4900       DependenciesArray, CGF.VoidPtrTy);
4901   return std::make_pair(NumOfElements, DependenciesArray);
4902 }
4903 
4904 Address CGOpenMPRuntime::emitDepobjDependClause(
4905     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4906     SourceLocation Loc) {
4907   if (Dependencies.DepExprs.empty())
4908     return Address::invalid();
4909   // Process list of dependencies.
4910   ASTContext &C = CGM.getContext();
4911   Address DependenciesArray = Address::invalid();
4912   unsigned NumDependencies = Dependencies.DepExprs.size();
4913   QualType FlagsTy;
4914   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4915   RecordDecl *KmpDependInfoRD =
4916       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4917 
4918   llvm::Value *Size;
4919   // Define type kmp_depend_info[<Dependencies.size()>];
4920   // For depobj reserve one extra element to store the number of elements.
4921   // It is required to handle depobj(x) update(in) construct.
4922   // kmp_depend_info[<Dependencies.size()>] deps;
4923   llvm::Value *NumDepsVal;
4924   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4925   if (const auto *IE =
4926           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4927     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4928     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4929       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4930       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4931       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4932     }
4933     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4934                                     NumDepsVal);
4935     CharUnits SizeInBytes =
4936         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4937     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4938     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4939     NumDepsVal =
4940         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4941   } else {
4942     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4943         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4944         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4945     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4946     Size = CGM.getSize(Sz.alignTo(Align));
4947     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4948   }
4949   // Need to allocate on the dynamic memory.
4950   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4951   // Use default allocator.
4952   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4953   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4954 
4955   llvm::Value *Addr =
4956       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4957                               CGM.getModule(), OMPRTL___kmpc_alloc),
4958                           Args, ".dep.arr.addr");
4959   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4960       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4961   DependenciesArray = Address(Addr, Align);
4962   // Write number of elements in the first element of array for depobj.
4963   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4964   // deps[i].base_addr = NumDependencies;
4965   LValue BaseAddrLVal = CGF.EmitLValueForField(
4966       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4967   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4968   llvm::PointerUnion<unsigned *, LValue *> Pos;
4969   unsigned Idx = 1;
4970   LValue PosLVal;
4971   if (Dependencies.IteratorExpr) {
4972     PosLVal = CGF.MakeAddrLValue(
4973         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4974         C.getSizeType());
4975     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4976                           /*IsInit=*/true);
4977     Pos = &PosLVal;
4978   } else {
4979     Pos = &Idx;
4980   }
4981   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4982   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4983       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4984   return DependenciesArray;
4985 }
4986 
4987 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4988                                         SourceLocation Loc) {
4989   ASTContext &C = CGM.getContext();
4990   QualType FlagsTy;
4991   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4992   LValue Base = CGF.EmitLoadOfPointerLValue(
4993       DepobjLVal.getAddress(CGF),
4994       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4995   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4996   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4997       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4998   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4999       Addr.getPointer(),
5000       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5001   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5002                                                                CGF.VoidPtrTy);
5003   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5004   // Use default allocator.
5005   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5006   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5007 
5008   // _kmpc_free(gtid, addr, nullptr);
5009   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5010                                 CGM.getModule(), OMPRTL___kmpc_free),
5011                             Args);
5012 }
5013 
5014 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5015                                        OpenMPDependClauseKind NewDepKind,
5016                                        SourceLocation Loc) {
5017   ASTContext &C = CGM.getContext();
5018   QualType FlagsTy;
5019   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5020   RecordDecl *KmpDependInfoRD =
5021       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5022   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5023   llvm::Value *NumDeps;
5024   LValue Base;
5025   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5026 
5027   Address Begin = Base.getAddress(CGF);
5028   // Cast from pointer to array type to pointer to single element.
5029   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5030   // The basic structure here is a while-do loop.
5031   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5032   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5033   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5034   CGF.EmitBlock(BodyBB);
5035   llvm::PHINode *ElementPHI =
5036       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5037   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5038   Begin = Address(ElementPHI, Begin.getAlignment());
5039   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5040                             Base.getTBAAInfo());
5041   // deps[i].flags = NewDepKind;
5042   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5043   LValue FlagsLVal = CGF.EmitLValueForField(
5044       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5045   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5046                         FlagsLVal);
5047 
5048   // Shift the address forward by one element.
5049   Address ElementNext =
5050       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5051   ElementPHI->addIncoming(ElementNext.getPointer(),
5052                           CGF.Builder.GetInsertBlock());
5053   llvm::Value *IsEmpty =
5054       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5055   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5056   // Done.
5057   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5058 }
5059 
5060 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5061                                    const OMPExecutableDirective &D,
5062                                    llvm::Function *TaskFunction,
5063                                    QualType SharedsTy, Address Shareds,
5064                                    const Expr *IfCond,
5065                                    const OMPTaskDataTy &Data) {
5066   if (!CGF.HaveInsertPoint())
5067     return;
5068 
5069   TaskResultTy Result =
5070       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5071   llvm::Value *NewTask = Result.NewTask;
5072   llvm::Function *TaskEntry = Result.TaskEntry;
5073   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5074   LValue TDBase = Result.TDBase;
5075   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5076   // Process list of dependences.
5077   Address DependenciesArray = Address::invalid();
5078   llvm::Value *NumOfElements;
5079   std::tie(NumOfElements, DependenciesArray) =
5080       emitDependClause(CGF, Data.Dependences, Loc);
5081 
5082   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5083   // libcall.
5084   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5085   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5086   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5087   // list is not empty
5088   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5089   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5090   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5091   llvm::Value *DepTaskArgs[7];
5092   if (!Data.Dependences.empty()) {
5093     DepTaskArgs[0] = UpLoc;
5094     DepTaskArgs[1] = ThreadID;
5095     DepTaskArgs[2] = NewTask;
5096     DepTaskArgs[3] = NumOfElements;
5097     DepTaskArgs[4] = DependenciesArray.getPointer();
5098     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5099     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5100   }
5101   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5102                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5103     if (!Data.Tied) {
5104       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5105       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5106       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5107     }
5108     if (!Data.Dependences.empty()) {
5109       CGF.EmitRuntimeCall(
5110           OMPBuilder.getOrCreateRuntimeFunction(
5111               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5112           DepTaskArgs);
5113     } else {
5114       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5115                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5116                           TaskArgs);
5117     }
5118     // Check if parent region is untied and build return for untied task;
5119     if (auto *Region =
5120             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5121       Region->emitUntiedSwitch(CGF);
5122   };
5123 
5124   llvm::Value *DepWaitTaskArgs[6];
5125   if (!Data.Dependences.empty()) {
5126     DepWaitTaskArgs[0] = UpLoc;
5127     DepWaitTaskArgs[1] = ThreadID;
5128     DepWaitTaskArgs[2] = NumOfElements;
5129     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5130     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5131     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5132   }
5133   auto &M = CGM.getModule();
5134   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5135                         TaskEntry, &Data, &DepWaitTaskArgs,
5136                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5137     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5138     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5139     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5140     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5141     // is specified.
5142     if (!Data.Dependences.empty())
5143       CGF.EmitRuntimeCall(
5144           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5145           DepWaitTaskArgs);
5146     // Call proxy_task_entry(gtid, new_task);
5147     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5148                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5149       Action.Enter(CGF);
5150       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5151       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5152                                                           OutlinedFnArgs);
5153     };
5154 
5155     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5156     // kmp_task_t *new_task);
5157     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5158     // kmp_task_t *new_task);
5159     RegionCodeGenTy RCG(CodeGen);
5160     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5161                               M, OMPRTL___kmpc_omp_task_begin_if0),
5162                           TaskArgs,
5163                           OMPBuilder.getOrCreateRuntimeFunction(
5164                               M, OMPRTL___kmpc_omp_task_complete_if0),
5165                           TaskArgs);
5166     RCG.setAction(Action);
5167     RCG(CGF);
5168   };
5169 
5170   if (IfCond) {
5171     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5172   } else {
5173     RegionCodeGenTy ThenRCG(ThenCodeGen);
5174     ThenRCG(CGF);
5175   }
5176 }
5177 
5178 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5179                                        const OMPLoopDirective &D,
5180                                        llvm::Function *TaskFunction,
5181                                        QualType SharedsTy, Address Shareds,
5182                                        const Expr *IfCond,
5183                                        const OMPTaskDataTy &Data) {
5184   if (!CGF.HaveInsertPoint())
5185     return;
5186   TaskResultTy Result =
5187       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5188   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5189   // libcall.
5190   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5191   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5192   // sched, kmp_uint64 grainsize, void *task_dup);
5193   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5194   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5195   llvm::Value *IfVal;
5196   if (IfCond) {
5197     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5198                                       /*isSigned=*/true);
5199   } else {
5200     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5201   }
5202 
5203   LValue LBLVal = CGF.EmitLValueForField(
5204       Result.TDBase,
5205       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5206   const auto *LBVar =
5207       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5208   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5209                        LBLVal.getQuals(),
5210                        /*IsInitializer=*/true);
5211   LValue UBLVal = CGF.EmitLValueForField(
5212       Result.TDBase,
5213       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5214   const auto *UBVar =
5215       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5216   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5217                        UBLVal.getQuals(),
5218                        /*IsInitializer=*/true);
5219   LValue StLVal = CGF.EmitLValueForField(
5220       Result.TDBase,
5221       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5222   const auto *StVar =
5223       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5224   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5225                        StLVal.getQuals(),
5226                        /*IsInitializer=*/true);
5227   // Store reductions address.
5228   LValue RedLVal = CGF.EmitLValueForField(
5229       Result.TDBase,
5230       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5231   if (Data.Reductions) {
5232     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5233   } else {
5234     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5235                                CGF.getContext().VoidPtrTy);
5236   }
5237   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5238   llvm::Value *TaskArgs[] = {
5239       UpLoc,
5240       ThreadID,
5241       Result.NewTask,
5242       IfVal,
5243       LBLVal.getPointer(CGF),
5244       UBLVal.getPointer(CGF),
5245       CGF.EmitLoadOfScalar(StLVal, Loc),
5246       llvm::ConstantInt::getSigned(
5247           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5248       llvm::ConstantInt::getSigned(
5249           CGF.IntTy, Data.Schedule.getPointer()
5250                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5251                          : NoSchedule),
5252       Data.Schedule.getPointer()
5253           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5254                                       /*isSigned=*/false)
5255           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5256       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5257                              Result.TaskDupFn, CGF.VoidPtrTy)
5258                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5259   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5260                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5261                       TaskArgs);
5262 }
5263 
5264 /// Emit reduction operation for each element of array (required for
5265 /// array sections) LHS op = RHS.
5266 /// \param Type Type of array.
5267 /// \param LHSVar Variable on the left side of the reduction operation
5268 /// (references element of array in original variable).
5269 /// \param RHSVar Variable on the right side of the reduction operation
5270 /// (references element of array in original variable).
5271 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5272 /// RHSVar.
5273 static void EmitOMPAggregateReduction(
5274     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5275     const VarDecl *RHSVar,
5276     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5277                                   const Expr *, const Expr *)> &RedOpGen,
5278     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5279     const Expr *UpExpr = nullptr) {
5280   // Perform element-by-element initialization.
5281   QualType ElementTy;
5282   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5283   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5284 
5285   // Drill down to the base element type on both arrays.
5286   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5287   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5288 
5289   llvm::Value *RHSBegin = RHSAddr.getPointer();
5290   llvm::Value *LHSBegin = LHSAddr.getPointer();
5291   // Cast from pointer to array type to pointer to single element.
5292   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5293   // The basic structure here is a while-do loop.
5294   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5295   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5296   llvm::Value *IsEmpty =
5297       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5298   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5299 
5300   // Enter the loop body, making that address the current address.
5301   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5302   CGF.EmitBlock(BodyBB);
5303 
5304   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5305 
5306   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5307       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5308   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5309   Address RHSElementCurrent =
5310       Address(RHSElementPHI,
5311               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5312 
5313   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5314       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5315   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5316   Address LHSElementCurrent =
5317       Address(LHSElementPHI,
5318               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5319 
5320   // Emit copy.
5321   CodeGenFunction::OMPPrivateScope Scope(CGF);
5322   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5323   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5324   Scope.Privatize();
5325   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5326   Scope.ForceCleanup();
5327 
5328   // Shift the address forward by one element.
5329   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5330       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5331   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5332       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5333   // Check whether we've reached the end.
5334   llvm::Value *Done =
5335       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5336   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5337   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5338   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5339 
5340   // Done.
5341   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5342 }
5343 
5344 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5345 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5346 /// UDR combiner function.
5347 static void emitReductionCombiner(CodeGenFunction &CGF,
5348                                   const Expr *ReductionOp) {
5349   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5350     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5351       if (const auto *DRE =
5352               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5353         if (const auto *DRD =
5354                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5355           std::pair<llvm::Function *, llvm::Function *> Reduction =
5356               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5357           RValue Func = RValue::get(Reduction.first);
5358           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5359           CGF.EmitIgnoredExpr(ReductionOp);
5360           return;
5361         }
5362   CGF.EmitIgnoredExpr(ReductionOp);
5363 }
5364 
5365 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5366     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5367     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5368     ArrayRef<const Expr *> ReductionOps) {
5369   ASTContext &C = CGM.getContext();
5370 
5371   // void reduction_func(void *LHSArg, void *RHSArg);
5372   FunctionArgList Args;
5373   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5374                            ImplicitParamDecl::Other);
5375   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5376                            ImplicitParamDecl::Other);
5377   Args.push_back(&LHSArg);
5378   Args.push_back(&RHSArg);
5379   const auto &CGFI =
5380       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5381   std::string Name = getName({"omp", "reduction", "reduction_func"});
5382   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5383                                     llvm::GlobalValue::InternalLinkage, Name,
5384                                     &CGM.getModule());
5385   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5386   Fn->setDoesNotRecurse();
5387   CodeGenFunction CGF(CGM);
5388   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5389 
5390   // Dst = (void*[n])(LHSArg);
5391   // Src = (void*[n])(RHSArg);
5392   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5393       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5394       ArgsType), CGF.getPointerAlign());
5395   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5396       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5397       ArgsType), CGF.getPointerAlign());
5398 
5399   //  ...
5400   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5401   //  ...
5402   CodeGenFunction::OMPPrivateScope Scope(CGF);
5403   auto IPriv = Privates.begin();
5404   unsigned Idx = 0;
5405   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5406     const auto *RHSVar =
5407         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5408     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5409       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5410     });
5411     const auto *LHSVar =
5412         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5413     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5414       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5415     });
5416     QualType PrivTy = (*IPriv)->getType();
5417     if (PrivTy->isVariablyModifiedType()) {
5418       // Get array size and emit VLA type.
5419       ++Idx;
5420       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5421       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5422       const VariableArrayType *VLA =
5423           CGF.getContext().getAsVariableArrayType(PrivTy);
5424       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5425       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5426           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5427       CGF.EmitVariablyModifiedType(PrivTy);
5428     }
5429   }
5430   Scope.Privatize();
5431   IPriv = Privates.begin();
5432   auto ILHS = LHSExprs.begin();
5433   auto IRHS = RHSExprs.begin();
5434   for (const Expr *E : ReductionOps) {
5435     if ((*IPriv)->getType()->isArrayType()) {
5436       // Emit reduction for array section.
5437       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5438       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5439       EmitOMPAggregateReduction(
5440           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5441           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5442             emitReductionCombiner(CGF, E);
5443           });
5444     } else {
5445       // Emit reduction for array subscript or single variable.
5446       emitReductionCombiner(CGF, E);
5447     }
5448     ++IPriv;
5449     ++ILHS;
5450     ++IRHS;
5451   }
5452   Scope.ForceCleanup();
5453   CGF.FinishFunction();
5454   return Fn;
5455 }
5456 
5457 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5458                                                   const Expr *ReductionOp,
5459                                                   const Expr *PrivateRef,
5460                                                   const DeclRefExpr *LHS,
5461                                                   const DeclRefExpr *RHS) {
5462   if (PrivateRef->getType()->isArrayType()) {
5463     // Emit reduction for array section.
5464     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5465     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5466     EmitOMPAggregateReduction(
5467         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5468         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5469           emitReductionCombiner(CGF, ReductionOp);
5470         });
5471   } else {
5472     // Emit reduction for array subscript or single variable.
5473     emitReductionCombiner(CGF, ReductionOp);
5474   }
5475 }
5476 
5477 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5478                                     ArrayRef<const Expr *> Privates,
5479                                     ArrayRef<const Expr *> LHSExprs,
5480                                     ArrayRef<const Expr *> RHSExprs,
5481                                     ArrayRef<const Expr *> ReductionOps,
5482                                     ReductionOptionsTy Options) {
5483   if (!CGF.HaveInsertPoint())
5484     return;
5485 
5486   bool WithNowait = Options.WithNowait;
5487   bool SimpleReduction = Options.SimpleReduction;
5488 
5489   // Next code should be emitted for reduction:
5490   //
5491   // static kmp_critical_name lock = { 0 };
5492   //
5493   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5494   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5495   //  ...
5496   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5497   //  *(Type<n>-1*)rhs[<n>-1]);
5498   // }
5499   //
5500   // ...
5501   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5502   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5503   // RedList, reduce_func, &<lock>)) {
5504   // case 1:
5505   //  ...
5506   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5507   //  ...
5508   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5509   // break;
5510   // case 2:
5511   //  ...
5512   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5513   //  ...
5514   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5515   // break;
5516   // default:;
5517   // }
5518   //
5519   // if SimpleReduction is true, only the next code is generated:
5520   //  ...
5521   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5522   //  ...
5523 
5524   ASTContext &C = CGM.getContext();
5525 
5526   if (SimpleReduction) {
5527     CodeGenFunction::RunCleanupsScope Scope(CGF);
5528     auto IPriv = Privates.begin();
5529     auto ILHS = LHSExprs.begin();
5530     auto IRHS = RHSExprs.begin();
5531     for (const Expr *E : ReductionOps) {
5532       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5533                                   cast<DeclRefExpr>(*IRHS));
5534       ++IPriv;
5535       ++ILHS;
5536       ++IRHS;
5537     }
5538     return;
5539   }
5540 
5541   // 1. Build a list of reduction variables.
5542   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5543   auto Size = RHSExprs.size();
5544   for (const Expr *E : Privates) {
5545     if (E->getType()->isVariablyModifiedType())
5546       // Reserve place for array size.
5547       ++Size;
5548   }
5549   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5550   QualType ReductionArrayTy =
5551       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5552                              /*IndexTypeQuals=*/0);
5553   Address ReductionList =
5554       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5555   auto IPriv = Privates.begin();
5556   unsigned Idx = 0;
5557   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5558     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5559     CGF.Builder.CreateStore(
5560         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5561             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5562         Elem);
5563     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5564       // Store array size.
5565       ++Idx;
5566       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5567       llvm::Value *Size = CGF.Builder.CreateIntCast(
5568           CGF.getVLASize(
5569                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5570               .NumElts,
5571           CGF.SizeTy, /*isSigned=*/false);
5572       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5573                               Elem);
5574     }
5575   }
5576 
5577   // 2. Emit reduce_func().
5578   llvm::Function *ReductionFn = emitReductionFunction(
5579       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5580       LHSExprs, RHSExprs, ReductionOps);
5581 
5582   // 3. Create static kmp_critical_name lock = { 0 };
5583   std::string Name = getName({"reduction"});
5584   llvm::Value *Lock = getCriticalRegionLock(Name);
5585 
5586   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5587   // RedList, reduce_func, &<lock>);
5588   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5589   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5590   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5591   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5592       ReductionList.getPointer(), CGF.VoidPtrTy);
5593   llvm::Value *Args[] = {
5594       IdentTLoc,                             // ident_t *<loc>
5595       ThreadId,                              // i32 <gtid>
5596       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5597       ReductionArrayTySize,                  // size_type sizeof(RedList)
5598       RL,                                    // void *RedList
5599       ReductionFn, // void (*) (void *, void *) <reduce_func>
5600       Lock         // kmp_critical_name *&<lock>
5601   };
5602   llvm::Value *Res = CGF.EmitRuntimeCall(
5603       OMPBuilder.getOrCreateRuntimeFunction(
5604           CGM.getModule(),
5605           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5606       Args);
5607 
5608   // 5. Build switch(res)
5609   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5610   llvm::SwitchInst *SwInst =
5611       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5612 
5613   // 6. Build case 1:
5614   //  ...
5615   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5616   //  ...
5617   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5618   // break;
5619   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5620   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5621   CGF.EmitBlock(Case1BB);
5622 
5623   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5624   llvm::Value *EndArgs[] = {
5625       IdentTLoc, // ident_t *<loc>
5626       ThreadId,  // i32 <gtid>
5627       Lock       // kmp_critical_name *&<lock>
5628   };
5629   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5630                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5631     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5632     auto IPriv = Privates.begin();
5633     auto ILHS = LHSExprs.begin();
5634     auto IRHS = RHSExprs.begin();
5635     for (const Expr *E : ReductionOps) {
5636       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5637                                      cast<DeclRefExpr>(*IRHS));
5638       ++IPriv;
5639       ++ILHS;
5640       ++IRHS;
5641     }
5642   };
5643   RegionCodeGenTy RCG(CodeGen);
5644   CommonActionTy Action(
5645       nullptr, llvm::None,
5646       OMPBuilder.getOrCreateRuntimeFunction(
5647           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5648                                       : OMPRTL___kmpc_end_reduce),
5649       EndArgs);
5650   RCG.setAction(Action);
5651   RCG(CGF);
5652 
5653   CGF.EmitBranch(DefaultBB);
5654 
5655   // 7. Build case 2:
5656   //  ...
5657   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5658   //  ...
5659   // break;
5660   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5661   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5662   CGF.EmitBlock(Case2BB);
5663 
5664   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5665                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5666     auto ILHS = LHSExprs.begin();
5667     auto IRHS = RHSExprs.begin();
5668     auto IPriv = Privates.begin();
5669     for (const Expr *E : ReductionOps) {
5670       const Expr *XExpr = nullptr;
5671       const Expr *EExpr = nullptr;
5672       const Expr *UpExpr = nullptr;
5673       BinaryOperatorKind BO = BO_Comma;
5674       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5675         if (BO->getOpcode() == BO_Assign) {
5676           XExpr = BO->getLHS();
5677           UpExpr = BO->getRHS();
5678         }
5679       }
5680       // Try to emit update expression as a simple atomic.
5681       const Expr *RHSExpr = UpExpr;
5682       if (RHSExpr) {
5683         // Analyze RHS part of the whole expression.
5684         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5685                 RHSExpr->IgnoreParenImpCasts())) {
5686           // If this is a conditional operator, analyze its condition for
5687           // min/max reduction operator.
5688           RHSExpr = ACO->getCond();
5689         }
5690         if (const auto *BORHS =
5691                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5692           EExpr = BORHS->getRHS();
5693           BO = BORHS->getOpcode();
5694         }
5695       }
5696       if (XExpr) {
5697         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5698         auto &&AtomicRedGen = [BO, VD,
5699                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5700                                     const Expr *EExpr, const Expr *UpExpr) {
5701           LValue X = CGF.EmitLValue(XExpr);
5702           RValue E;
5703           if (EExpr)
5704             E = CGF.EmitAnyExpr(EExpr);
5705           CGF.EmitOMPAtomicSimpleUpdateExpr(
5706               X, E, BO, /*IsXLHSInRHSPart=*/true,
5707               llvm::AtomicOrdering::Monotonic, Loc,
5708               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5709                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5710                 PrivateScope.addPrivate(
5711                     VD, [&CGF, VD, XRValue, Loc]() {
5712                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5713                       CGF.emitOMPSimpleStore(
5714                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5715                           VD->getType().getNonReferenceType(), Loc);
5716                       return LHSTemp;
5717                     });
5718                 (void)PrivateScope.Privatize();
5719                 return CGF.EmitAnyExpr(UpExpr);
5720               });
5721         };
5722         if ((*IPriv)->getType()->isArrayType()) {
5723           // Emit atomic reduction for array section.
5724           const auto *RHSVar =
5725               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5726           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5727                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5728         } else {
5729           // Emit atomic reduction for array subscript or single variable.
5730           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5731         }
5732       } else {
5733         // Emit as a critical region.
5734         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5735                                            const Expr *, const Expr *) {
5736           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5737           std::string Name = RT.getName({"atomic_reduction"});
5738           RT.emitCriticalRegion(
5739               CGF, Name,
5740               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5741                 Action.Enter(CGF);
5742                 emitReductionCombiner(CGF, E);
5743               },
5744               Loc);
5745         };
5746         if ((*IPriv)->getType()->isArrayType()) {
5747           const auto *LHSVar =
5748               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5749           const auto *RHSVar =
5750               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5751           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5752                                     CritRedGen);
5753         } else {
5754           CritRedGen(CGF, nullptr, nullptr, nullptr);
5755         }
5756       }
5757       ++ILHS;
5758       ++IRHS;
5759       ++IPriv;
5760     }
5761   };
5762   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5763   if (!WithNowait) {
5764     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5765     llvm::Value *EndArgs[] = {
5766         IdentTLoc, // ident_t *<loc>
5767         ThreadId,  // i32 <gtid>
5768         Lock       // kmp_critical_name *&<lock>
5769     };
5770     CommonActionTy Action(nullptr, llvm::None,
5771                           OMPBuilder.getOrCreateRuntimeFunction(
5772                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5773                           EndArgs);
5774     AtomicRCG.setAction(Action);
5775     AtomicRCG(CGF);
5776   } else {
5777     AtomicRCG(CGF);
5778   }
5779 
5780   CGF.EmitBranch(DefaultBB);
5781   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5782 }
5783 
5784 /// Generates unique name for artificial threadprivate variables.
5785 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5786 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5787                                       const Expr *Ref) {
5788   SmallString<256> Buffer;
5789   llvm::raw_svector_ostream Out(Buffer);
5790   const clang::DeclRefExpr *DE;
5791   const VarDecl *D = ::getBaseDecl(Ref, DE);
5792   if (!D)
5793     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5794   D = D->getCanonicalDecl();
5795   std::string Name = CGM.getOpenMPRuntime().getName(
5796       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5797   Out << Prefix << Name << "_"
5798       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5799   return std::string(Out.str());
5800 }
5801 
5802 /// Emits reduction initializer function:
5803 /// \code
5804 /// void @.red_init(void* %arg, void* %orig) {
5805 /// %0 = bitcast void* %arg to <type>*
5806 /// store <type> <init>, <type>* %0
5807 /// ret void
5808 /// }
5809 /// \endcode
5810 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5811                                            SourceLocation Loc,
5812                                            ReductionCodeGen &RCG, unsigned N) {
5813   ASTContext &C = CGM.getContext();
5814   QualType VoidPtrTy = C.VoidPtrTy;
5815   VoidPtrTy.addRestrict();
5816   FunctionArgList Args;
5817   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5818                           ImplicitParamDecl::Other);
5819   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5820                               ImplicitParamDecl::Other);
5821   Args.emplace_back(&Param);
5822   Args.emplace_back(&ParamOrig);
5823   const auto &FnInfo =
5824       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5825   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5826   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5827   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5828                                     Name, &CGM.getModule());
5829   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5830   Fn->setDoesNotRecurse();
5831   CodeGenFunction CGF(CGM);
5832   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5833   Address PrivateAddr = CGF.EmitLoadOfPointer(
5834       CGF.GetAddrOfLocalVar(&Param),
5835       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5836   llvm::Value *Size = nullptr;
5837   // If the size of the reduction item is non-constant, load it from global
5838   // threadprivate variable.
5839   if (RCG.getSizes(N).second) {
5840     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5841         CGF, CGM.getContext().getSizeType(),
5842         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5843     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5844                                 CGM.getContext().getSizeType(), Loc);
5845   }
5846   RCG.emitAggregateType(CGF, N, Size);
5847   LValue OrigLVal;
5848   // If initializer uses initializer from declare reduction construct, emit a
5849   // pointer to the address of the original reduction item (reuired by reduction
5850   // initializer)
5851   if (RCG.usesReductionInitializer(N)) {
5852     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5853     SharedAddr = CGF.EmitLoadOfPointer(
5854         SharedAddr,
5855         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5856     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5857   } else {
5858     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5859         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5860         CGM.getContext().VoidPtrTy);
5861   }
5862   // Emit the initializer:
5863   // %0 = bitcast void* %arg to <type>*
5864   // store <type> <init>, <type>* %0
5865   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5866                          [](CodeGenFunction &) { return false; });
5867   CGF.FinishFunction();
5868   return Fn;
5869 }
5870 
5871 /// Emits reduction combiner function:
5872 /// \code
5873 /// void @.red_comb(void* %arg0, void* %arg1) {
5874 /// %lhs = bitcast void* %arg0 to <type>*
5875 /// %rhs = bitcast void* %arg1 to <type>*
5876 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5877 /// store <type> %2, <type>* %lhs
5878 /// ret void
5879 /// }
5880 /// \endcode
5881 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5882                                            SourceLocation Loc,
5883                                            ReductionCodeGen &RCG, unsigned N,
5884                                            const Expr *ReductionOp,
5885                                            const Expr *LHS, const Expr *RHS,
5886                                            const Expr *PrivateRef) {
5887   ASTContext &C = CGM.getContext();
5888   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5889   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5890   FunctionArgList Args;
5891   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5892                                C.VoidPtrTy, ImplicitParamDecl::Other);
5893   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5894                             ImplicitParamDecl::Other);
5895   Args.emplace_back(&ParamInOut);
5896   Args.emplace_back(&ParamIn);
5897   const auto &FnInfo =
5898       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5899   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5900   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5901   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5902                                     Name, &CGM.getModule());
5903   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5904   Fn->setDoesNotRecurse();
5905   CodeGenFunction CGF(CGM);
5906   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5907   llvm::Value *Size = nullptr;
5908   // If the size of the reduction item is non-constant, load it from global
5909   // threadprivate variable.
5910   if (RCG.getSizes(N).second) {
5911     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5912         CGF, CGM.getContext().getSizeType(),
5913         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5914     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5915                                 CGM.getContext().getSizeType(), Loc);
5916   }
5917   RCG.emitAggregateType(CGF, N, Size);
5918   // Remap lhs and rhs variables to the addresses of the function arguments.
5919   // %lhs = bitcast void* %arg0 to <type>*
5920   // %rhs = bitcast void* %arg1 to <type>*
5921   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5922   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5923     // Pull out the pointer to the variable.
5924     Address PtrAddr = CGF.EmitLoadOfPointer(
5925         CGF.GetAddrOfLocalVar(&ParamInOut),
5926         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5927     return CGF.Builder.CreateElementBitCast(
5928         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5929   });
5930   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5931     // Pull out the pointer to the variable.
5932     Address PtrAddr = CGF.EmitLoadOfPointer(
5933         CGF.GetAddrOfLocalVar(&ParamIn),
5934         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5935     return CGF.Builder.CreateElementBitCast(
5936         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5937   });
5938   PrivateScope.Privatize();
5939   // Emit the combiner body:
5940   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5941   // store <type> %2, <type>* %lhs
5942   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5943       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5944       cast<DeclRefExpr>(RHS));
5945   CGF.FinishFunction();
5946   return Fn;
5947 }
5948 
5949 /// Emits reduction finalizer function:
5950 /// \code
5951 /// void @.red_fini(void* %arg) {
5952 /// %0 = bitcast void* %arg to <type>*
5953 /// <destroy>(<type>* %0)
5954 /// ret void
5955 /// }
5956 /// \endcode
5957 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5958                                            SourceLocation Loc,
5959                                            ReductionCodeGen &RCG, unsigned N) {
5960   if (!RCG.needCleanups(N))
5961     return nullptr;
5962   ASTContext &C = CGM.getContext();
5963   FunctionArgList Args;
5964   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5965                           ImplicitParamDecl::Other);
5966   Args.emplace_back(&Param);
5967   const auto &FnInfo =
5968       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5969   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5970   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5971   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5972                                     Name, &CGM.getModule());
5973   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5974   Fn->setDoesNotRecurse();
5975   CodeGenFunction CGF(CGM);
5976   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5977   Address PrivateAddr = CGF.EmitLoadOfPointer(
5978       CGF.GetAddrOfLocalVar(&Param),
5979       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5980   llvm::Value *Size = nullptr;
5981   // If the size of the reduction item is non-constant, load it from global
5982   // threadprivate variable.
5983   if (RCG.getSizes(N).second) {
5984     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5985         CGF, CGM.getContext().getSizeType(),
5986         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5987     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5988                                 CGM.getContext().getSizeType(), Loc);
5989   }
5990   RCG.emitAggregateType(CGF, N, Size);
5991   // Emit the finalizer body:
5992   // <destroy>(<type>* %0)
5993   RCG.emitCleanups(CGF, N, PrivateAddr);
5994   CGF.FinishFunction(Loc);
5995   return Fn;
5996 }
5997 
5998 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5999     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6000     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6001   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6002     return nullptr;
6003 
6004   // Build typedef struct:
6005   // kmp_taskred_input {
6006   //   void *reduce_shar; // shared reduction item
6007   //   void *reduce_orig; // original reduction item used for initialization
6008   //   size_t reduce_size; // size of data item
6009   //   void *reduce_init; // data initialization routine
6010   //   void *reduce_fini; // data finalization routine
6011   //   void *reduce_comb; // data combiner routine
6012   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6013   // } kmp_taskred_input_t;
6014   ASTContext &C = CGM.getContext();
6015   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6016   RD->startDefinition();
6017   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6018   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6019   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6020   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6021   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6022   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6023   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6024       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6025   RD->completeDefinition();
6026   QualType RDType = C.getRecordType(RD);
6027   unsigned Size = Data.ReductionVars.size();
6028   llvm::APInt ArraySize(/*numBits=*/64, Size);
6029   QualType ArrayRDType = C.getConstantArrayType(
6030       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6031   // kmp_task_red_input_t .rd_input.[Size];
6032   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6033   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6034                        Data.ReductionCopies, Data.ReductionOps);
6035   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6036     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6037     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6038                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6039     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6040         TaskRedInput.getPointer(), Idxs,
6041         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6042         ".rd_input.gep.");
6043     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6044     // ElemLVal.reduce_shar = &Shareds[Cnt];
6045     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6046     RCG.emitSharedOrigLValue(CGF, Cnt);
6047     llvm::Value *CastedShared =
6048         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6049     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6050     // ElemLVal.reduce_orig = &Origs[Cnt];
6051     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6052     llvm::Value *CastedOrig =
6053         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6054     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6055     RCG.emitAggregateType(CGF, Cnt);
6056     llvm::Value *SizeValInChars;
6057     llvm::Value *SizeVal;
6058     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6059     // We use delayed creation/initialization for VLAs and array sections. It is
6060     // required because runtime does not provide the way to pass the sizes of
6061     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6062     // threadprivate global variables are used to store these values and use
6063     // them in the functions.
6064     bool DelayedCreation = !!SizeVal;
6065     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6066                                                /*isSigned=*/false);
6067     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6068     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6069     // ElemLVal.reduce_init = init;
6070     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6071     llvm::Value *InitAddr =
6072         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6073     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6074     // ElemLVal.reduce_fini = fini;
6075     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6076     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6077     llvm::Value *FiniAddr = Fini
6078                                 ? CGF.EmitCastToVoidPtr(Fini)
6079                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6080     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6081     // ElemLVal.reduce_comb = comb;
6082     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6083     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6084         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6085         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6086     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6087     // ElemLVal.flags = 0;
6088     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6089     if (DelayedCreation) {
6090       CGF.EmitStoreOfScalar(
6091           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6092           FlagsLVal);
6093     } else
6094       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6095                                  FlagsLVal.getType());
6096   }
6097   if (Data.IsReductionWithTaskMod) {
6098     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6099     // is_ws, int num, void *data);
6100     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6101     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6102                                                   CGM.IntTy, /*isSigned=*/true);
6103     llvm::Value *Args[] = {
6104         IdentTLoc, GTid,
6105         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6106                                /*isSigned=*/true),
6107         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6108         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6109             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6110     return CGF.EmitRuntimeCall(
6111         OMPBuilder.getOrCreateRuntimeFunction(
6112             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6113         Args);
6114   }
6115   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6116   llvm::Value *Args[] = {
6117       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6118                                 /*isSigned=*/true),
6119       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6120       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6121                                                       CGM.VoidPtrTy)};
6122   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6123                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6124                              Args);
6125 }
6126 
6127 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6128                                             SourceLocation Loc,
6129                                             bool IsWorksharingReduction) {
6130   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6131   // is_ws, int num, void *data);
6132   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6133   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6134                                                 CGM.IntTy, /*isSigned=*/true);
6135   llvm::Value *Args[] = {IdentTLoc, GTid,
6136                          llvm::ConstantInt::get(CGM.IntTy,
6137                                                 IsWorksharingReduction ? 1 : 0,
6138                                                 /*isSigned=*/true)};
6139   (void)CGF.EmitRuntimeCall(
6140       OMPBuilder.getOrCreateRuntimeFunction(
6141           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6142       Args);
6143 }
6144 
6145 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6146                                               SourceLocation Loc,
6147                                               ReductionCodeGen &RCG,
6148                                               unsigned N) {
6149   auto Sizes = RCG.getSizes(N);
6150   // Emit threadprivate global variable if the type is non-constant
6151   // (Sizes.second = nullptr).
6152   if (Sizes.second) {
6153     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6154                                                      /*isSigned=*/false);
6155     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6156         CGF, CGM.getContext().getSizeType(),
6157         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6158     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6159   }
6160 }
6161 
6162 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6163                                               SourceLocation Loc,
6164                                               llvm::Value *ReductionsPtr,
6165                                               LValue SharedLVal) {
6166   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6167   // *d);
6168   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6169                                                    CGM.IntTy,
6170                                                    /*isSigned=*/true),
6171                          ReductionsPtr,
6172                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6173                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6174   return Address(
6175       CGF.EmitRuntimeCall(
6176           OMPBuilder.getOrCreateRuntimeFunction(
6177               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6178           Args),
6179       SharedLVal.getAlignment());
6180 }
6181 
6182 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6183                                        SourceLocation Loc) {
6184   if (!CGF.HaveInsertPoint())
6185     return;
6186 
6187   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6188     OMPBuilder.createTaskwait(CGF.Builder);
6189   } else {
6190     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6191     // global_tid);
6192     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6193     // Ignore return result until untied tasks are supported.
6194     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6195                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6196                         Args);
6197   }
6198 
6199   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6200     Region->emitUntiedSwitch(CGF);
6201 }
6202 
6203 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6204                                            OpenMPDirectiveKind InnerKind,
6205                                            const RegionCodeGenTy &CodeGen,
6206                                            bool HasCancel) {
6207   if (!CGF.HaveInsertPoint())
6208     return;
6209   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6210   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6211 }
6212 
6213 namespace {
6214 enum RTCancelKind {
6215   CancelNoreq = 0,
6216   CancelParallel = 1,
6217   CancelLoop = 2,
6218   CancelSections = 3,
6219   CancelTaskgroup = 4
6220 };
6221 } // anonymous namespace
6222 
6223 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6224   RTCancelKind CancelKind = CancelNoreq;
6225   if (CancelRegion == OMPD_parallel)
6226     CancelKind = CancelParallel;
6227   else if (CancelRegion == OMPD_for)
6228     CancelKind = CancelLoop;
6229   else if (CancelRegion == OMPD_sections)
6230     CancelKind = CancelSections;
6231   else {
6232     assert(CancelRegion == OMPD_taskgroup);
6233     CancelKind = CancelTaskgroup;
6234   }
6235   return CancelKind;
6236 }
6237 
6238 void CGOpenMPRuntime::emitCancellationPointCall(
6239     CodeGenFunction &CGF, SourceLocation Loc,
6240     OpenMPDirectiveKind CancelRegion) {
6241   if (!CGF.HaveInsertPoint())
6242     return;
6243   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6244   // global_tid, kmp_int32 cncl_kind);
6245   if (auto *OMPRegionInfo =
6246           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6247     // For 'cancellation point taskgroup', the task region info may not have a
6248     // cancel. This may instead happen in another adjacent task.
6249     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6250       llvm::Value *Args[] = {
6251           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6252           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6253       // Ignore return result until untied tasks are supported.
6254       llvm::Value *Result = CGF.EmitRuntimeCall(
6255           OMPBuilder.getOrCreateRuntimeFunction(
6256               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6257           Args);
6258       // if (__kmpc_cancellationpoint()) {
6259       //   exit from construct;
6260       // }
6261       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6262       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6263       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6264       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6265       CGF.EmitBlock(ExitBB);
6266       // exit from construct;
6267       CodeGenFunction::JumpDest CancelDest =
6268           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6269       CGF.EmitBranchThroughCleanup(CancelDest);
6270       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6271     }
6272   }
6273 }
6274 
6275 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6276                                      const Expr *IfCond,
6277                                      OpenMPDirectiveKind CancelRegion) {
6278   if (!CGF.HaveInsertPoint())
6279     return;
6280   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6281   // kmp_int32 cncl_kind);
6282   auto &M = CGM.getModule();
6283   if (auto *OMPRegionInfo =
6284           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6285     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6286                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6287       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6288       llvm::Value *Args[] = {
6289           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6290           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6291       // Ignore return result until untied tasks are supported.
6292       llvm::Value *Result = CGF.EmitRuntimeCall(
6293           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6294       // if (__kmpc_cancel()) {
6295       //   exit from construct;
6296       // }
6297       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6298       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6299       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6300       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6301       CGF.EmitBlock(ExitBB);
6302       // exit from construct;
6303       CodeGenFunction::JumpDest CancelDest =
6304           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6305       CGF.EmitBranchThroughCleanup(CancelDest);
6306       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6307     };
6308     if (IfCond) {
6309       emitIfClause(CGF, IfCond, ThenGen,
6310                    [](CodeGenFunction &, PrePostActionTy &) {});
6311     } else {
6312       RegionCodeGenTy ThenRCG(ThenGen);
6313       ThenRCG(CGF);
6314     }
6315   }
6316 }
6317 
6318 namespace {
6319 /// Cleanup action for uses_allocators support.
6320 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6321   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6322 
6323 public:
6324   OMPUsesAllocatorsActionTy(
6325       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6326       : Allocators(Allocators) {}
6327   void Enter(CodeGenFunction &CGF) override {
6328     if (!CGF.HaveInsertPoint())
6329       return;
6330     for (const auto &AllocatorData : Allocators) {
6331       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6332           CGF, AllocatorData.first, AllocatorData.second);
6333     }
6334   }
6335   void Exit(CodeGenFunction &CGF) override {
6336     if (!CGF.HaveInsertPoint())
6337       return;
6338     for (const auto &AllocatorData : Allocators) {
6339       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6340                                                         AllocatorData.first);
6341     }
6342   }
6343 };
6344 } // namespace
6345 
6346 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6347     const OMPExecutableDirective &D, StringRef ParentName,
6348     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6349     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6350   assert(!ParentName.empty() && "Invalid target region parent name!");
6351   HasEmittedTargetRegion = true;
6352   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6353   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6354     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6355       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6356       if (!D.AllocatorTraits)
6357         continue;
6358       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6359     }
6360   }
6361   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6362   CodeGen.setAction(UsesAllocatorAction);
6363   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6364                                    IsOffloadEntry, CodeGen);
6365 }
6366 
6367 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6368                                              const Expr *Allocator,
6369                                              const Expr *AllocatorTraits) {
6370   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6371   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6372   // Use default memspace handle.
6373   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6374   llvm::Value *NumTraits = llvm::ConstantInt::get(
6375       CGF.IntTy, cast<ConstantArrayType>(
6376                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6377                      ->getSize()
6378                      .getLimitedValue());
6379   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6380   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6381       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6382   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6383                                            AllocatorTraitsLVal.getBaseInfo(),
6384                                            AllocatorTraitsLVal.getTBAAInfo());
6385   llvm::Value *Traits =
6386       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6387 
6388   llvm::Value *AllocatorVal =
6389       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6390                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6391                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6392   // Store to allocator.
6393   CGF.EmitVarDecl(*cast<VarDecl>(
6394       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6395   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6396   AllocatorVal =
6397       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6398                                Allocator->getType(), Allocator->getExprLoc());
6399   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6400 }
6401 
6402 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6403                                              const Expr *Allocator) {
6404   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6405   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6406   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6407   llvm::Value *AllocatorVal =
6408       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6409   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6410                                           CGF.getContext().VoidPtrTy,
6411                                           Allocator->getExprLoc());
6412   (void)CGF.EmitRuntimeCall(
6413       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6414                                             OMPRTL___kmpc_destroy_allocator),
6415       {ThreadId, AllocatorVal});
6416 }
6417 
6418 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6419     const OMPExecutableDirective &D, StringRef ParentName,
6420     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6421     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6422   // Create a unique name for the entry function using the source location
6423   // information of the current target region. The name will be something like:
6424   //
6425   // __omp_offloading_DD_FFFF_PP_lBB
6426   //
6427   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6428   // mangled name of the function that encloses the target region and BB is the
6429   // line number of the target region.
6430 
6431   unsigned DeviceID;
6432   unsigned FileID;
6433   unsigned Line;
6434   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6435                            Line);
6436   SmallString<64> EntryFnName;
6437   {
6438     llvm::raw_svector_ostream OS(EntryFnName);
6439     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6440        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6441   }
6442 
6443   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6444 
6445   CodeGenFunction CGF(CGM, true);
6446   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6447   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6448 
6449   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6450 
6451   // If this target outline function is not an offload entry, we don't need to
6452   // register it.
6453   if (!IsOffloadEntry)
6454     return;
6455 
6456   // The target region ID is used by the runtime library to identify the current
6457   // target region, so it only has to be unique and not necessarily point to
6458   // anything. It could be the pointer to the outlined function that implements
6459   // the target region, but we aren't using that so that the compiler doesn't
6460   // need to keep that, and could therefore inline the host function if proven
6461   // worthwhile during optimization. In the other hand, if emitting code for the
6462   // device, the ID has to be the function address so that it can retrieved from
6463   // the offloading entry and launched by the runtime library. We also mark the
6464   // outlined function to have external linkage in case we are emitting code for
6465   // the device, because these functions will be entry points to the device.
6466 
6467   if (CGM.getLangOpts().OpenMPIsDevice) {
6468     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6469     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6470     OutlinedFn->setDSOLocal(false);
6471     if (CGM.getTriple().isAMDGCN())
6472       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6473   } else {
6474     std::string Name = getName({EntryFnName, "region_id"});
6475     OutlinedFnID = new llvm::GlobalVariable(
6476         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6477         llvm::GlobalValue::WeakAnyLinkage,
6478         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6479   }
6480 
6481   // Register the information for the entry associated with this target region.
6482   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6483       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6484       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6485 }
6486 
6487 /// Checks if the expression is constant or does not have non-trivial function
6488 /// calls.
6489 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6490   // We can skip constant expressions.
6491   // We can skip expressions with trivial calls or simple expressions.
6492   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6493           !E->hasNonTrivialCall(Ctx)) &&
6494          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6495 }
6496 
6497 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6498                                                     const Stmt *Body) {
6499   const Stmt *Child = Body->IgnoreContainers();
6500   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6501     Child = nullptr;
6502     for (const Stmt *S : C->body()) {
6503       if (const auto *E = dyn_cast<Expr>(S)) {
6504         if (isTrivial(Ctx, E))
6505           continue;
6506       }
6507       // Some of the statements can be ignored.
6508       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6509           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6510         continue;
6511       // Analyze declarations.
6512       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6513         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6514               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6515                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6516                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6517                   isa<UsingDirectiveDecl>(D) ||
6518                   isa<OMPDeclareReductionDecl>(D) ||
6519                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6520                 return true;
6521               const auto *VD = dyn_cast<VarDecl>(D);
6522               if (!VD)
6523                 return false;
6524               return VD->isConstexpr() ||
6525                      ((VD->getType().isTrivialType(Ctx) ||
6526                        VD->getType()->isReferenceType()) &&
6527                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6528             }))
6529           continue;
6530       }
6531       // Found multiple children - cannot get the one child only.
6532       if (Child)
6533         return nullptr;
6534       Child = S;
6535     }
6536     if (Child)
6537       Child = Child->IgnoreContainers();
6538   }
6539   return Child;
6540 }
6541 
6542 /// Emit the number of teams for a target directive.  Inspect the num_teams
6543 /// clause associated with a teams construct combined or closely nested
6544 /// with the target directive.
6545 ///
6546 /// Emit a team of size one for directives such as 'target parallel' that
6547 /// have no associated teams construct.
6548 ///
6549 /// Otherwise, return nullptr.
6550 static llvm::Value *
6551 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6552                                const OMPExecutableDirective &D) {
6553   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6554          "Clauses associated with the teams directive expected to be emitted "
6555          "only for the host!");
6556   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6557   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6558          "Expected target-based executable directive.");
6559   CGBuilderTy &Bld = CGF.Builder;
6560   switch (DirectiveKind) {
6561   case OMPD_target: {
6562     const auto *CS = D.getInnermostCapturedStmt();
6563     const auto *Body =
6564         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6565     const Stmt *ChildStmt =
6566         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6567     if (const auto *NestedDir =
6568             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6569       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6570         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6571           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6572           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6573           const Expr *NumTeams =
6574               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6575           llvm::Value *NumTeamsVal =
6576               CGF.EmitScalarExpr(NumTeams,
6577                                  /*IgnoreResultAssign*/ true);
6578           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6579                                    /*isSigned=*/true);
6580         }
6581         return Bld.getInt32(0);
6582       }
6583       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6584           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6585         return Bld.getInt32(1);
6586       return Bld.getInt32(0);
6587     }
6588     return nullptr;
6589   }
6590   case OMPD_target_teams:
6591   case OMPD_target_teams_distribute:
6592   case OMPD_target_teams_distribute_simd:
6593   case OMPD_target_teams_distribute_parallel_for:
6594   case OMPD_target_teams_distribute_parallel_for_simd: {
6595     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6596       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6597       const Expr *NumTeams =
6598           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6599       llvm::Value *NumTeamsVal =
6600           CGF.EmitScalarExpr(NumTeams,
6601                              /*IgnoreResultAssign*/ true);
6602       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6603                                /*isSigned=*/true);
6604     }
6605     return Bld.getInt32(0);
6606   }
6607   case OMPD_target_parallel:
6608   case OMPD_target_parallel_for:
6609   case OMPD_target_parallel_for_simd:
6610   case OMPD_target_simd:
6611     return Bld.getInt32(1);
6612   case OMPD_parallel:
6613   case OMPD_for:
6614   case OMPD_parallel_for:
6615   case OMPD_parallel_master:
6616   case OMPD_parallel_sections:
6617   case OMPD_for_simd:
6618   case OMPD_parallel_for_simd:
6619   case OMPD_cancel:
6620   case OMPD_cancellation_point:
6621   case OMPD_ordered:
6622   case OMPD_threadprivate:
6623   case OMPD_allocate:
6624   case OMPD_task:
6625   case OMPD_simd:
6626   case OMPD_sections:
6627   case OMPD_section:
6628   case OMPD_single:
6629   case OMPD_master:
6630   case OMPD_critical:
6631   case OMPD_taskyield:
6632   case OMPD_barrier:
6633   case OMPD_taskwait:
6634   case OMPD_taskgroup:
6635   case OMPD_atomic:
6636   case OMPD_flush:
6637   case OMPD_depobj:
6638   case OMPD_scan:
6639   case OMPD_teams:
6640   case OMPD_target_data:
6641   case OMPD_target_exit_data:
6642   case OMPD_target_enter_data:
6643   case OMPD_distribute:
6644   case OMPD_distribute_simd:
6645   case OMPD_distribute_parallel_for:
6646   case OMPD_distribute_parallel_for_simd:
6647   case OMPD_teams_distribute:
6648   case OMPD_teams_distribute_simd:
6649   case OMPD_teams_distribute_parallel_for:
6650   case OMPD_teams_distribute_parallel_for_simd:
6651   case OMPD_target_update:
6652   case OMPD_declare_simd:
6653   case OMPD_declare_variant:
6654   case OMPD_begin_declare_variant:
6655   case OMPD_end_declare_variant:
6656   case OMPD_declare_target:
6657   case OMPD_end_declare_target:
6658   case OMPD_declare_reduction:
6659   case OMPD_declare_mapper:
6660   case OMPD_taskloop:
6661   case OMPD_taskloop_simd:
6662   case OMPD_master_taskloop:
6663   case OMPD_master_taskloop_simd:
6664   case OMPD_parallel_master_taskloop:
6665   case OMPD_parallel_master_taskloop_simd:
6666   case OMPD_requires:
6667   case OMPD_unknown:
6668     break;
6669   default:
6670     break;
6671   }
6672   llvm_unreachable("Unexpected directive kind.");
6673 }
6674 
6675 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6676                                   llvm::Value *DefaultThreadLimitVal) {
6677   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6678       CGF.getContext(), CS->getCapturedStmt());
6679   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6680     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6681       llvm::Value *NumThreads = nullptr;
6682       llvm::Value *CondVal = nullptr;
6683       // Handle if clause. If if clause present, the number of threads is
6684       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6685       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6686         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6687         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6688         const OMPIfClause *IfClause = nullptr;
6689         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6690           if (C->getNameModifier() == OMPD_unknown ||
6691               C->getNameModifier() == OMPD_parallel) {
6692             IfClause = C;
6693             break;
6694           }
6695         }
6696         if (IfClause) {
6697           const Expr *Cond = IfClause->getCondition();
6698           bool Result;
6699           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6700             if (!Result)
6701               return CGF.Builder.getInt32(1);
6702           } else {
6703             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6704             if (const auto *PreInit =
6705                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6706               for (const auto *I : PreInit->decls()) {
6707                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6708                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6709                 } else {
6710                   CodeGenFunction::AutoVarEmission Emission =
6711                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6712                   CGF.EmitAutoVarCleanups(Emission);
6713                 }
6714               }
6715             }
6716             CondVal = CGF.EvaluateExprAsBool(Cond);
6717           }
6718         }
6719       }
6720       // Check the value of num_threads clause iff if clause was not specified
6721       // or is not evaluated to false.
6722       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6723         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6724         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6725         const auto *NumThreadsClause =
6726             Dir->getSingleClause<OMPNumThreadsClause>();
6727         CodeGenFunction::LexicalScope Scope(
6728             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6729         if (const auto *PreInit =
6730                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6731           for (const auto *I : PreInit->decls()) {
6732             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6733               CGF.EmitVarDecl(cast<VarDecl>(*I));
6734             } else {
6735               CodeGenFunction::AutoVarEmission Emission =
6736                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6737               CGF.EmitAutoVarCleanups(Emission);
6738             }
6739           }
6740         }
6741         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6742         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6743                                                /*isSigned=*/false);
6744         if (DefaultThreadLimitVal)
6745           NumThreads = CGF.Builder.CreateSelect(
6746               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6747               DefaultThreadLimitVal, NumThreads);
6748       } else {
6749         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6750                                            : CGF.Builder.getInt32(0);
6751       }
6752       // Process condition of the if clause.
6753       if (CondVal) {
6754         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6755                                               CGF.Builder.getInt32(1));
6756       }
6757       return NumThreads;
6758     }
6759     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6760       return CGF.Builder.getInt32(1);
6761     return DefaultThreadLimitVal;
6762   }
6763   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6764                                : CGF.Builder.getInt32(0);
6765 }
6766 
6767 /// Emit the number of threads for a target directive.  Inspect the
6768 /// thread_limit clause associated with a teams construct combined or closely
6769 /// nested with the target directive.
6770 ///
6771 /// Emit the num_threads clause for directives such as 'target parallel' that
6772 /// have no associated teams construct.
6773 ///
6774 /// Otherwise, return nullptr.
6775 static llvm::Value *
6776 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6777                                  const OMPExecutableDirective &D) {
6778   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6779          "Clauses associated with the teams directive expected to be emitted "
6780          "only for the host!");
6781   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6782   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6783          "Expected target-based executable directive.");
6784   CGBuilderTy &Bld = CGF.Builder;
6785   llvm::Value *ThreadLimitVal = nullptr;
6786   llvm::Value *NumThreadsVal = nullptr;
6787   switch (DirectiveKind) {
6788   case OMPD_target: {
6789     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6790     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6791       return NumThreads;
6792     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6793         CGF.getContext(), CS->getCapturedStmt());
6794     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6795       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6796         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6797         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6798         const auto *ThreadLimitClause =
6799             Dir->getSingleClause<OMPThreadLimitClause>();
6800         CodeGenFunction::LexicalScope Scope(
6801             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6802         if (const auto *PreInit =
6803                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6804           for (const auto *I : PreInit->decls()) {
6805             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6806               CGF.EmitVarDecl(cast<VarDecl>(*I));
6807             } else {
6808               CodeGenFunction::AutoVarEmission Emission =
6809                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6810               CGF.EmitAutoVarCleanups(Emission);
6811             }
6812           }
6813         }
6814         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6815             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6816         ThreadLimitVal =
6817             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6818       }
6819       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6820           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6821         CS = Dir->getInnermostCapturedStmt();
6822         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6823             CGF.getContext(), CS->getCapturedStmt());
6824         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6825       }
6826       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6827           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6828         CS = Dir->getInnermostCapturedStmt();
6829         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6830           return NumThreads;
6831       }
6832       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6833         return Bld.getInt32(1);
6834     }
6835     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6836   }
6837   case OMPD_target_teams: {
6838     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6839       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6840       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6841       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6842           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6843       ThreadLimitVal =
6844           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6845     }
6846     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6847     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6848       return NumThreads;
6849     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6850         CGF.getContext(), CS->getCapturedStmt());
6851     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6852       if (Dir->getDirectiveKind() == OMPD_distribute) {
6853         CS = Dir->getInnermostCapturedStmt();
6854         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6855           return NumThreads;
6856       }
6857     }
6858     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6859   }
6860   case OMPD_target_teams_distribute:
6861     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6862       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6863       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6864       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6865           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6866       ThreadLimitVal =
6867           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6868     }
6869     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6870   case OMPD_target_parallel:
6871   case OMPD_target_parallel_for:
6872   case OMPD_target_parallel_for_simd:
6873   case OMPD_target_teams_distribute_parallel_for:
6874   case OMPD_target_teams_distribute_parallel_for_simd: {
6875     llvm::Value *CondVal = nullptr;
6876     // Handle if clause. If if clause present, the number of threads is
6877     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6878     if (D.hasClausesOfKind<OMPIfClause>()) {
6879       const OMPIfClause *IfClause = nullptr;
6880       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6881         if (C->getNameModifier() == OMPD_unknown ||
6882             C->getNameModifier() == OMPD_parallel) {
6883           IfClause = C;
6884           break;
6885         }
6886       }
6887       if (IfClause) {
6888         const Expr *Cond = IfClause->getCondition();
6889         bool Result;
6890         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6891           if (!Result)
6892             return Bld.getInt32(1);
6893         } else {
6894           CodeGenFunction::RunCleanupsScope Scope(CGF);
6895           CondVal = CGF.EvaluateExprAsBool(Cond);
6896         }
6897       }
6898     }
6899     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6900       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6901       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6902       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6903           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6904       ThreadLimitVal =
6905           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6906     }
6907     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6908       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6909       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6910       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6911           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6912       NumThreadsVal =
6913           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6914       ThreadLimitVal = ThreadLimitVal
6915                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6916                                                                 ThreadLimitVal),
6917                                               NumThreadsVal, ThreadLimitVal)
6918                            : NumThreadsVal;
6919     }
6920     if (!ThreadLimitVal)
6921       ThreadLimitVal = Bld.getInt32(0);
6922     if (CondVal)
6923       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6924     return ThreadLimitVal;
6925   }
6926   case OMPD_target_teams_distribute_simd:
6927   case OMPD_target_simd:
6928     return Bld.getInt32(1);
6929   case OMPD_parallel:
6930   case OMPD_for:
6931   case OMPD_parallel_for:
6932   case OMPD_parallel_master:
6933   case OMPD_parallel_sections:
6934   case OMPD_for_simd:
6935   case OMPD_parallel_for_simd:
6936   case OMPD_cancel:
6937   case OMPD_cancellation_point:
6938   case OMPD_ordered:
6939   case OMPD_threadprivate:
6940   case OMPD_allocate:
6941   case OMPD_task:
6942   case OMPD_simd:
6943   case OMPD_sections:
6944   case OMPD_section:
6945   case OMPD_single:
6946   case OMPD_master:
6947   case OMPD_critical:
6948   case OMPD_taskyield:
6949   case OMPD_barrier:
6950   case OMPD_taskwait:
6951   case OMPD_taskgroup:
6952   case OMPD_atomic:
6953   case OMPD_flush:
6954   case OMPD_depobj:
6955   case OMPD_scan:
6956   case OMPD_teams:
6957   case OMPD_target_data:
6958   case OMPD_target_exit_data:
6959   case OMPD_target_enter_data:
6960   case OMPD_distribute:
6961   case OMPD_distribute_simd:
6962   case OMPD_distribute_parallel_for:
6963   case OMPD_distribute_parallel_for_simd:
6964   case OMPD_teams_distribute:
6965   case OMPD_teams_distribute_simd:
6966   case OMPD_teams_distribute_parallel_for:
6967   case OMPD_teams_distribute_parallel_for_simd:
6968   case OMPD_target_update:
6969   case OMPD_declare_simd:
6970   case OMPD_declare_variant:
6971   case OMPD_begin_declare_variant:
6972   case OMPD_end_declare_variant:
6973   case OMPD_declare_target:
6974   case OMPD_end_declare_target:
6975   case OMPD_declare_reduction:
6976   case OMPD_declare_mapper:
6977   case OMPD_taskloop:
6978   case OMPD_taskloop_simd:
6979   case OMPD_master_taskloop:
6980   case OMPD_master_taskloop_simd:
6981   case OMPD_parallel_master_taskloop:
6982   case OMPD_parallel_master_taskloop_simd:
6983   case OMPD_requires:
6984   case OMPD_unknown:
6985     break;
6986   default:
6987     break;
6988   }
6989   llvm_unreachable("Unsupported directive kind.");
6990 }
6991 
6992 namespace {
6993 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6994 
6995 // Utility to handle information from clauses associated with a given
6996 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6997 // It provides a convenient interface to obtain the information and generate
6998 // code for that information.
6999 class MappableExprsHandler {
7000 public:
7001   /// Values for bit flags used to specify the mapping type for
7002   /// offloading.
7003   enum OpenMPOffloadMappingFlags : uint64_t {
7004     /// No flags
7005     OMP_MAP_NONE = 0x0,
7006     /// Allocate memory on the device and move data from host to device.
7007     OMP_MAP_TO = 0x01,
7008     /// Allocate memory on the device and move data from device to host.
7009     OMP_MAP_FROM = 0x02,
7010     /// Always perform the requested mapping action on the element, even
7011     /// if it was already mapped before.
7012     OMP_MAP_ALWAYS = 0x04,
7013     /// Delete the element from the device environment, ignoring the
7014     /// current reference count associated with the element.
7015     OMP_MAP_DELETE = 0x08,
7016     /// The element being mapped is a pointer-pointee pair; both the
7017     /// pointer and the pointee should be mapped.
7018     OMP_MAP_PTR_AND_OBJ = 0x10,
7019     /// This flags signals that the base address of an entry should be
7020     /// passed to the target kernel as an argument.
7021     OMP_MAP_TARGET_PARAM = 0x20,
7022     /// Signal that the runtime library has to return the device pointer
7023     /// in the current position for the data being mapped. Used when we have the
7024     /// use_device_ptr or use_device_addr clause.
7025     OMP_MAP_RETURN_PARAM = 0x40,
7026     /// This flag signals that the reference being passed is a pointer to
7027     /// private data.
7028     OMP_MAP_PRIVATE = 0x80,
7029     /// Pass the element to the device by value.
7030     OMP_MAP_LITERAL = 0x100,
7031     /// Implicit map
7032     OMP_MAP_IMPLICIT = 0x200,
7033     /// Close is a hint to the runtime to allocate memory close to
7034     /// the target device.
7035     OMP_MAP_CLOSE = 0x400,
7036     /// 0x800 is reserved for compatibility with XLC.
7037     /// Produce a runtime error if the data is not already allocated.
7038     OMP_MAP_PRESENT = 0x1000,
7039     /// Signal that the runtime library should use args as an array of
7040     /// descriptor_dim pointers and use args_size as dims. Used when we have
7041     /// non-contiguous list items in target update directive
7042     OMP_MAP_NON_CONTIG = 0x100000000000,
7043     /// The 16 MSBs of the flags indicate whether the entry is member of some
7044     /// struct/class.
7045     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7046     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7047   };
7048 
7049   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7050   static unsigned getFlagMemberOffset() {
7051     unsigned Offset = 0;
7052     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7053          Remain = Remain >> 1)
7054       Offset++;
7055     return Offset;
7056   }
7057 
7058   /// Class that holds debugging information for a data mapping to be passed to
7059   /// the runtime library.
7060   class MappingExprInfo {
7061     /// The variable declaration used for the data mapping.
7062     const ValueDecl *MapDecl = nullptr;
7063     /// The original expression used in the map clause, or null if there is
7064     /// none.
7065     const Expr *MapExpr = nullptr;
7066 
7067   public:
7068     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7069         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7070 
7071     const ValueDecl *getMapDecl() const { return MapDecl; }
7072     const Expr *getMapExpr() const { return MapExpr; }
7073   };
7074 
7075   /// Class that associates information with a base pointer to be passed to the
7076   /// runtime library.
7077   class BasePointerInfo {
7078     /// The base pointer.
7079     llvm::Value *Ptr = nullptr;
7080     /// The base declaration that refers to this device pointer, or null if
7081     /// there is none.
7082     const ValueDecl *DevPtrDecl = nullptr;
7083 
7084   public:
7085     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7086         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7087     llvm::Value *operator*() const { return Ptr; }
7088     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7089     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7090   };
7091 
7092   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7093   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7094   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7095   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7096   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7097   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7098   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7099 
7100   /// This structure contains combined information generated for mappable
7101   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7102   /// mappers, and non-contiguous information.
7103   struct MapCombinedInfoTy {
7104     struct StructNonContiguousInfo {
7105       bool IsNonContiguous = false;
7106       MapDimArrayTy Dims;
7107       MapNonContiguousArrayTy Offsets;
7108       MapNonContiguousArrayTy Counts;
7109       MapNonContiguousArrayTy Strides;
7110     };
7111     MapExprsArrayTy Exprs;
7112     MapBaseValuesArrayTy BasePointers;
7113     MapValuesArrayTy Pointers;
7114     MapValuesArrayTy Sizes;
7115     MapFlagsArrayTy Types;
7116     MapMappersArrayTy Mappers;
7117     StructNonContiguousInfo NonContigInfo;
7118 
7119     /// Append arrays in \a CurInfo.
7120     void append(MapCombinedInfoTy &CurInfo) {
7121       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7122       BasePointers.append(CurInfo.BasePointers.begin(),
7123                           CurInfo.BasePointers.end());
7124       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7125       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7126       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7127       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7128       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7129                                  CurInfo.NonContigInfo.Dims.end());
7130       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7131                                     CurInfo.NonContigInfo.Offsets.end());
7132       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7133                                    CurInfo.NonContigInfo.Counts.end());
7134       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7135                                     CurInfo.NonContigInfo.Strides.end());
7136     }
7137   };
7138 
7139   /// Map between a struct and the its lowest & highest elements which have been
7140   /// mapped.
7141   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7142   ///                    HE(FieldIndex, Pointer)}
7143   struct StructRangeInfoTy {
7144     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7145         0, Address::invalid()};
7146     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7147         0, Address::invalid()};
7148     Address Base = Address::invalid();
7149     bool IsArraySection = false;
7150   };
7151 
7152 private:
7153   /// Kind that defines how a device pointer has to be returned.
7154   struct MapInfo {
7155     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7156     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7157     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7158     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7159     bool ReturnDevicePointer = false;
7160     bool IsImplicit = false;
7161     const ValueDecl *Mapper = nullptr;
7162     const Expr *VarRef = nullptr;
7163     bool ForDeviceAddr = false;
7164 
7165     MapInfo() = default;
7166     MapInfo(
7167         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7168         OpenMPMapClauseKind MapType,
7169         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7170         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7171         bool ReturnDevicePointer, bool IsImplicit,
7172         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7173         bool ForDeviceAddr = false)
7174         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7175           MotionModifiers(MotionModifiers),
7176           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7177           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7178   };
7179 
7180   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7181   /// member and there is no map information about it, then emission of that
7182   /// entry is deferred until the whole struct has been processed.
7183   struct DeferredDevicePtrEntryTy {
7184     const Expr *IE = nullptr;
7185     const ValueDecl *VD = nullptr;
7186     bool ForDeviceAddr = false;
7187 
7188     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7189                              bool ForDeviceAddr)
7190         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7191   };
7192 
7193   /// The target directive from where the mappable clauses were extracted. It
7194   /// is either a executable directive or a user-defined mapper directive.
7195   llvm::PointerUnion<const OMPExecutableDirective *,
7196                      const OMPDeclareMapperDecl *>
7197       CurDir;
7198 
7199   /// Function the directive is being generated for.
7200   CodeGenFunction &CGF;
7201 
7202   /// Set of all first private variables in the current directive.
7203   /// bool data is set to true if the variable is implicitly marked as
7204   /// firstprivate, false otherwise.
7205   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7206 
7207   /// Map between device pointer declarations and their expression components.
7208   /// The key value for declarations in 'this' is null.
7209   llvm::DenseMap<
7210       const ValueDecl *,
7211       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7212       DevPointersMap;
7213 
7214   llvm::Value *getExprTypeSize(const Expr *E) const {
7215     QualType ExprTy = E->getType().getCanonicalType();
7216 
7217     // Calculate the size for array shaping expression.
7218     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7219       llvm::Value *Size =
7220           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7221       for (const Expr *SE : OAE->getDimensions()) {
7222         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7223         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7224                                       CGF.getContext().getSizeType(),
7225                                       SE->getExprLoc());
7226         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7227       }
7228       return Size;
7229     }
7230 
7231     // Reference types are ignored for mapping purposes.
7232     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7233       ExprTy = RefTy->getPointeeType().getCanonicalType();
7234 
7235     // Given that an array section is considered a built-in type, we need to
7236     // do the calculation based on the length of the section instead of relying
7237     // on CGF.getTypeSize(E->getType()).
7238     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7239       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7240                             OAE->getBase()->IgnoreParenImpCasts())
7241                             .getCanonicalType();
7242 
7243       // If there is no length associated with the expression and lower bound is
7244       // not specified too, that means we are using the whole length of the
7245       // base.
7246       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7247           !OAE->getLowerBound())
7248         return CGF.getTypeSize(BaseTy);
7249 
7250       llvm::Value *ElemSize;
7251       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7252         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7253       } else {
7254         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7255         assert(ATy && "Expecting array type if not a pointer type.");
7256         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7257       }
7258 
7259       // If we don't have a length at this point, that is because we have an
7260       // array section with a single element.
7261       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7262         return ElemSize;
7263 
7264       if (const Expr *LenExpr = OAE->getLength()) {
7265         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7266         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7267                                              CGF.getContext().getSizeType(),
7268                                              LenExpr->getExprLoc());
7269         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7270       }
7271       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7272              OAE->getLowerBound() && "expected array_section[lb:].");
7273       // Size = sizetype - lb * elemtype;
7274       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7275       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7276       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7277                                        CGF.getContext().getSizeType(),
7278                                        OAE->getLowerBound()->getExprLoc());
7279       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7280       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7281       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7282       LengthVal = CGF.Builder.CreateSelect(
7283           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7284       return LengthVal;
7285     }
7286     return CGF.getTypeSize(ExprTy);
7287   }
7288 
7289   /// Return the corresponding bits for a given map clause modifier. Add
7290   /// a flag marking the map as a pointer if requested. Add a flag marking the
7291   /// map as the first one of a series of maps that relate to the same map
7292   /// expression.
7293   OpenMPOffloadMappingFlags getMapTypeBits(
7294       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7295       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7296       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7297     OpenMPOffloadMappingFlags Bits =
7298         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7299     switch (MapType) {
7300     case OMPC_MAP_alloc:
7301     case OMPC_MAP_release:
7302       // alloc and release is the default behavior in the runtime library,  i.e.
7303       // if we don't pass any bits alloc/release that is what the runtime is
7304       // going to do. Therefore, we don't need to signal anything for these two
7305       // type modifiers.
7306       break;
7307     case OMPC_MAP_to:
7308       Bits |= OMP_MAP_TO;
7309       break;
7310     case OMPC_MAP_from:
7311       Bits |= OMP_MAP_FROM;
7312       break;
7313     case OMPC_MAP_tofrom:
7314       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7315       break;
7316     case OMPC_MAP_delete:
7317       Bits |= OMP_MAP_DELETE;
7318       break;
7319     case OMPC_MAP_unknown:
7320       llvm_unreachable("Unexpected map type!");
7321     }
7322     if (AddPtrFlag)
7323       Bits |= OMP_MAP_PTR_AND_OBJ;
7324     if (AddIsTargetParamFlag)
7325       Bits |= OMP_MAP_TARGET_PARAM;
7326     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7327         != MapModifiers.end())
7328       Bits |= OMP_MAP_ALWAYS;
7329     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7330         != MapModifiers.end())
7331       Bits |= OMP_MAP_CLOSE;
7332     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present)
7333         != MapModifiers.end())
7334       Bits |= OMP_MAP_PRESENT;
7335     if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present)
7336         != MotionModifiers.end())
7337       Bits |= OMP_MAP_PRESENT;
7338     if (IsNonContiguous)
7339       Bits |= OMP_MAP_NON_CONTIG;
7340     return Bits;
7341   }
7342 
7343   /// Return true if the provided expression is a final array section. A
7344   /// final array section, is one whose length can't be proved to be one.
7345   bool isFinalArraySectionExpression(const Expr *E) const {
7346     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7347 
7348     // It is not an array section and therefore not a unity-size one.
7349     if (!OASE)
7350       return false;
7351 
7352     // An array section with no colon always refer to a single element.
7353     if (OASE->getColonLocFirst().isInvalid())
7354       return false;
7355 
7356     const Expr *Length = OASE->getLength();
7357 
7358     // If we don't have a length we have to check if the array has size 1
7359     // for this dimension. Also, we should always expect a length if the
7360     // base type is pointer.
7361     if (!Length) {
7362       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7363                              OASE->getBase()->IgnoreParenImpCasts())
7364                              .getCanonicalType();
7365       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7366         return ATy->getSize().getSExtValue() != 1;
7367       // If we don't have a constant dimension length, we have to consider
7368       // the current section as having any size, so it is not necessarily
7369       // unitary. If it happen to be unity size, that's user fault.
7370       return true;
7371     }
7372 
7373     // Check if the length evaluates to 1.
7374     Expr::EvalResult Result;
7375     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7376       return true; // Can have more that size 1.
7377 
7378     llvm::APSInt ConstLength = Result.Val.getInt();
7379     return ConstLength.getSExtValue() != 1;
7380   }
7381 
7382   /// Generate the base pointers, section pointers, sizes, map type bits, and
7383   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7384   /// map type, map or motion modifiers, and expression components.
7385   /// \a IsFirstComponent should be set to true if the provided set of
7386   /// components is the first associated with a capture.
7387   void generateInfoForComponentList(
7388       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7389       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7390       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7391       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7392       bool IsFirstComponentList, bool IsImplicit,
7393       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7394       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7395       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7396           OverlappedElements = llvm::None) const {
7397     // The following summarizes what has to be generated for each map and the
7398     // types below. The generated information is expressed in this order:
7399     // base pointer, section pointer, size, flags
7400     // (to add to the ones that come from the map type and modifier).
7401     //
7402     // double d;
7403     // int i[100];
7404     // float *p;
7405     //
7406     // struct S1 {
7407     //   int i;
7408     //   float f[50];
7409     // }
7410     // struct S2 {
7411     //   int i;
7412     //   float f[50];
7413     //   S1 s;
7414     //   double *p;
7415     //   struct S2 *ps;
7416     // }
7417     // S2 s;
7418     // S2 *ps;
7419     //
7420     // map(d)
7421     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7422     //
7423     // map(i)
7424     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7425     //
7426     // map(i[1:23])
7427     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7428     //
7429     // map(p)
7430     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7431     //
7432     // map(p[1:24])
7433     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7434     // in unified shared memory mode or for local pointers
7435     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7436     //
7437     // map(s)
7438     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7439     //
7440     // map(s.i)
7441     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7442     //
7443     // map(s.s.f)
7444     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7445     //
7446     // map(s.p)
7447     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7448     //
7449     // map(to: s.p[:22])
7450     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7451     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7452     // &(s.p), &(s.p[0]), 22*sizeof(double),
7453     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7454     // (*) alloc space for struct members, only this is a target parameter
7455     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7456     //      optimizes this entry out, same in the examples below)
7457     // (***) map the pointee (map: to)
7458     //
7459     // map(s.ps)
7460     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7461     //
7462     // map(from: s.ps->s.i)
7463     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7464     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7465     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7466     //
7467     // map(to: s.ps->ps)
7468     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7469     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7470     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7471     //
7472     // map(s.ps->ps->ps)
7473     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7474     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7475     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7476     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7477     //
7478     // map(to: s.ps->ps->s.f[:22])
7479     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7480     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7481     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7482     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7483     //
7484     // map(ps)
7485     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7486     //
7487     // map(ps->i)
7488     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7489     //
7490     // map(ps->s.f)
7491     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7492     //
7493     // map(from: ps->p)
7494     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7495     //
7496     // map(to: ps->p[:22])
7497     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7498     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7499     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7500     //
7501     // map(ps->ps)
7502     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7503     //
7504     // map(from: ps->ps->s.i)
7505     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7506     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7507     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7508     //
7509     // map(from: ps->ps->ps)
7510     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7511     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7512     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7513     //
7514     // map(ps->ps->ps->ps)
7515     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7516     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7517     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7518     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7519     //
7520     // map(to: ps->ps->ps->s.f[:22])
7521     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7522     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7523     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7524     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7525     //
7526     // map(to: s.f[:22]) map(from: s.p[:33])
7527     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7528     //     sizeof(double*) (**), TARGET_PARAM
7529     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7530     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7531     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7532     // (*) allocate contiguous space needed to fit all mapped members even if
7533     //     we allocate space for members not mapped (in this example,
7534     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7535     //     them as well because they fall between &s.f[0] and &s.p)
7536     //
7537     // map(from: s.f[:22]) map(to: ps->p[:33])
7538     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7539     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7540     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7541     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7542     // (*) the struct this entry pertains to is the 2nd element in the list of
7543     //     arguments, hence MEMBER_OF(2)
7544     //
7545     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7546     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7547     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7548     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7549     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7550     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7551     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7552     // (*) the struct this entry pertains to is the 4th element in the list
7553     //     of arguments, hence MEMBER_OF(4)
7554 
7555     // Track if the map information being generated is the first for a capture.
7556     bool IsCaptureFirstInfo = IsFirstComponentList;
7557     // When the variable is on a declare target link or in a to clause with
7558     // unified memory, a reference is needed to hold the host/device address
7559     // of the variable.
7560     bool RequiresReference = false;
7561 
7562     // Scan the components from the base to the complete expression.
7563     auto CI = Components.rbegin();
7564     auto CE = Components.rend();
7565     auto I = CI;
7566 
7567     // Track if the map information being generated is the first for a list of
7568     // components.
7569     bool IsExpressionFirstInfo = true;
7570     bool FirstPointerInComplexData = false;
7571     Address BP = Address::invalid();
7572     const Expr *AssocExpr = I->getAssociatedExpression();
7573     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7574     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7575     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7576 
7577     if (isa<MemberExpr>(AssocExpr)) {
7578       // The base is the 'this' pointer. The content of the pointer is going
7579       // to be the base of the field being mapped.
7580       BP = CGF.LoadCXXThisAddress();
7581     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7582                (OASE &&
7583                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7584       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7585     } else if (OAShE &&
7586                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7587       BP = Address(
7588           CGF.EmitScalarExpr(OAShE->getBase()),
7589           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7590     } else {
7591       // The base is the reference to the variable.
7592       // BP = &Var.
7593       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7594       if (const auto *VD =
7595               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7596         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7597                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7598           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7599               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7600                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7601             RequiresReference = true;
7602             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7603           }
7604         }
7605       }
7606 
7607       // If the variable is a pointer and is being dereferenced (i.e. is not
7608       // the last component), the base has to be the pointer itself, not its
7609       // reference. References are ignored for mapping purposes.
7610       QualType Ty =
7611           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7612       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7613         // No need to generate individual map information for the pointer, it
7614         // can be associated with the combined storage if shared memory mode is
7615         // active or the base declaration is not global variable.
7616         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7617         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7618             !VD || VD->hasLocalStorage())
7619           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7620         else
7621           FirstPointerInComplexData = true;
7622         ++I;
7623       }
7624     }
7625 
7626     // Track whether a component of the list should be marked as MEMBER_OF some
7627     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7628     // in a component list should be marked as MEMBER_OF, all subsequent entries
7629     // do not belong to the base struct. E.g.
7630     // struct S2 s;
7631     // s.ps->ps->ps->f[:]
7632     //   (1) (2) (3) (4)
7633     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7634     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7635     // is the pointee of ps(2) which is not member of struct s, so it should not
7636     // be marked as such (it is still PTR_AND_OBJ).
7637     // The variable is initialized to false so that PTR_AND_OBJ entries which
7638     // are not struct members are not considered (e.g. array of pointers to
7639     // data).
7640     bool ShouldBeMemberOf = false;
7641 
7642     // Variable keeping track of whether or not we have encountered a component
7643     // in the component list which is a member expression. Useful when we have a
7644     // pointer or a final array section, in which case it is the previous
7645     // component in the list which tells us whether we have a member expression.
7646     // E.g. X.f[:]
7647     // While processing the final array section "[:]" it is "f" which tells us
7648     // whether we are dealing with a member of a declared struct.
7649     const MemberExpr *EncounteredME = nullptr;
7650 
7651     // Track for the total number of dimension. Start from one for the dummy
7652     // dimension.
7653     uint64_t DimSize = 1;
7654 
7655     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7656 
7657     for (; I != CE; ++I) {
7658       // If the current component is member of a struct (parent struct) mark it.
7659       if (!EncounteredME) {
7660         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7661         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7662         // as MEMBER_OF the parent struct.
7663         if (EncounteredME) {
7664           ShouldBeMemberOf = true;
7665           // Do not emit as complex pointer if this is actually not array-like
7666           // expression.
7667           if (FirstPointerInComplexData) {
7668             QualType Ty = std::prev(I)
7669                               ->getAssociatedDeclaration()
7670                               ->getType()
7671                               .getNonReferenceType();
7672             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7673             FirstPointerInComplexData = false;
7674           }
7675         }
7676       }
7677 
7678       auto Next = std::next(I);
7679 
7680       // We need to generate the addresses and sizes if this is the last
7681       // component, if the component is a pointer or if it is an array section
7682       // whose length can't be proved to be one. If this is a pointer, it
7683       // becomes the base address for the following components.
7684 
7685       // A final array section, is one whose length can't be proved to be one.
7686       // If the map item is non-contiguous then we don't treat any array section
7687       // as final array section.
7688       bool IsFinalArraySection =
7689           !IsNonContiguous &&
7690           isFinalArraySectionExpression(I->getAssociatedExpression());
7691 
7692       // If we have a declaration for the mapping use that, otherwise use
7693       // the base declaration of the map clause.
7694       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7695                                      ? I->getAssociatedDeclaration()
7696                                      : BaseDecl;
7697 
7698       // Get information on whether the element is a pointer. Have to do a
7699       // special treatment for array sections given that they are built-in
7700       // types.
7701       const auto *OASE =
7702           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7703       const auto *OAShE =
7704           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7705       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7706       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7707       bool IsPointer =
7708           OAShE ||
7709           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7710                        .getCanonicalType()
7711                        ->isAnyPointerType()) ||
7712           I->getAssociatedExpression()->getType()->isAnyPointerType();
7713       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7714 
7715       if (OASE)
7716         ++DimSize;
7717 
7718       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7719         // If this is not the last component, we expect the pointer to be
7720         // associated with an array expression or member expression.
7721         assert((Next == CE ||
7722                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7723                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7724                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7725                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7726                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7727                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7728                "Unexpected expression");
7729 
7730         Address LB = Address::invalid();
7731         if (OAShE) {
7732           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7733                        CGF.getContext().getTypeAlignInChars(
7734                            OAShE->getBase()->getType()));
7735         } else {
7736           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7737                    .getAddress(CGF);
7738         }
7739 
7740         // If this component is a pointer inside the base struct then we don't
7741         // need to create any entry for it - it will be combined with the object
7742         // it is pointing to into a single PTR_AND_OBJ entry.
7743         bool IsMemberPointerOrAddr =
7744             (IsPointer || ForDeviceAddr) && EncounteredME &&
7745             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7746              EncounteredME);
7747         if (!OverlappedElements.empty()) {
7748           // Handle base element with the info for overlapped elements.
7749           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7750           assert(Next == CE &&
7751                  "Expected last element for the overlapped elements.");
7752           assert(!IsPointer &&
7753                  "Unexpected base element with the pointer type.");
7754           // Mark the whole struct as the struct that requires allocation on the
7755           // device.
7756           PartialStruct.LowestElem = {0, LB};
7757           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7758               I->getAssociatedExpression()->getType());
7759           Address HB = CGF.Builder.CreateConstGEP(
7760               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7761                                                               CGF.VoidPtrTy),
7762               TypeSize.getQuantity() - 1);
7763           PartialStruct.HighestElem = {
7764               std::numeric_limits<decltype(
7765                   PartialStruct.HighestElem.first)>::max(),
7766               HB};
7767           PartialStruct.Base = BP;
7768           // Emit data for non-overlapped data.
7769           OpenMPOffloadMappingFlags Flags =
7770               OMP_MAP_MEMBER_OF |
7771               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7772                              /*AddPtrFlag=*/false,
7773                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7774           LB = BP;
7775           llvm::Value *Size = nullptr;
7776           // Do bitcopy of all non-overlapped structure elements.
7777           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7778                    Component : OverlappedElements) {
7779             Address ComponentLB = Address::invalid();
7780             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7781                  Component) {
7782               if (MC.getAssociatedDeclaration()) {
7783                 ComponentLB =
7784                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7785                         .getAddress(CGF);
7786                 Size = CGF.Builder.CreatePtrDiff(
7787                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7788                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7789                 break;
7790               }
7791             }
7792             assert(Size && "Failed to determine structure size");
7793             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7794             CombinedInfo.BasePointers.push_back(BP.getPointer());
7795             CombinedInfo.Pointers.push_back(LB.getPointer());
7796             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7797                 Size, CGF.Int64Ty, /*isSigned=*/true));
7798             CombinedInfo.Types.push_back(Flags);
7799             CombinedInfo.Mappers.push_back(nullptr);
7800             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7801                                                                       : 1);
7802             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7803           }
7804           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7805           CombinedInfo.BasePointers.push_back(BP.getPointer());
7806           CombinedInfo.Pointers.push_back(LB.getPointer());
7807           Size = CGF.Builder.CreatePtrDiff(
7808               CGF.EmitCastToVoidPtr(
7809                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7810               CGF.EmitCastToVoidPtr(LB.getPointer()));
7811           CombinedInfo.Sizes.push_back(
7812               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7813           CombinedInfo.Types.push_back(Flags);
7814           CombinedInfo.Mappers.push_back(nullptr);
7815           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7816                                                                     : 1);
7817           break;
7818         }
7819         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7820         if (!IsMemberPointerOrAddr ||
7821             (Next == CE && MapType != OMPC_MAP_unknown)) {
7822           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7823           CombinedInfo.BasePointers.push_back(BP.getPointer());
7824           CombinedInfo.Pointers.push_back(LB.getPointer());
7825           CombinedInfo.Sizes.push_back(
7826               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7827           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7828                                                                     : 1);
7829 
7830           // If Mapper is valid, the last component inherits the mapper.
7831           bool HasMapper = Mapper && Next == CE;
7832           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7833 
7834           // We need to add a pointer flag for each map that comes from the
7835           // same expression except for the first one. We also need to signal
7836           // this map is the first one that relates with the current capture
7837           // (there is a set of entries for each capture).
7838           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7839               MapType, MapModifiers, MotionModifiers, IsImplicit,
7840               !IsExpressionFirstInfo || RequiresReference ||
7841                   FirstPointerInComplexData,
7842               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7843 
7844           if (!IsExpressionFirstInfo) {
7845             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7846             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7847             if (IsPointer)
7848               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7849                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7850 
7851             if (ShouldBeMemberOf) {
7852               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7853               // should be later updated with the correct value of MEMBER_OF.
7854               Flags |= OMP_MAP_MEMBER_OF;
7855               // From now on, all subsequent PTR_AND_OBJ entries should not be
7856               // marked as MEMBER_OF.
7857               ShouldBeMemberOf = false;
7858             }
7859           }
7860 
7861           CombinedInfo.Types.push_back(Flags);
7862         }
7863 
7864         // If we have encountered a member expression so far, keep track of the
7865         // mapped member. If the parent is "*this", then the value declaration
7866         // is nullptr.
7867         if (EncounteredME) {
7868           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7869           unsigned FieldIndex = FD->getFieldIndex();
7870 
7871           // Update info about the lowest and highest elements for this struct
7872           if (!PartialStruct.Base.isValid()) {
7873             PartialStruct.LowestElem = {FieldIndex, LB};
7874             if (IsFinalArraySection) {
7875               Address HB =
7876                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7877                       .getAddress(CGF);
7878               PartialStruct.HighestElem = {FieldIndex, HB};
7879             } else {
7880               PartialStruct.HighestElem = {FieldIndex, LB};
7881             }
7882             PartialStruct.Base = BP;
7883           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7884             PartialStruct.LowestElem = {FieldIndex, LB};
7885           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7886             PartialStruct.HighestElem = {FieldIndex, LB};
7887           }
7888         }
7889 
7890         // Need to emit combined struct for array sections.
7891         if (IsFinalArraySection || IsNonContiguous)
7892           PartialStruct.IsArraySection = true;
7893 
7894         // If we have a final array section, we are done with this expression.
7895         if (IsFinalArraySection)
7896           break;
7897 
7898         // The pointer becomes the base for the next element.
7899         if (Next != CE)
7900           BP = LB;
7901 
7902         IsExpressionFirstInfo = false;
7903         IsCaptureFirstInfo = false;
7904         FirstPointerInComplexData = false;
7905       } else if (FirstPointerInComplexData) {
7906         QualType Ty = Components.rbegin()
7907                           ->getAssociatedDeclaration()
7908                           ->getType()
7909                           .getNonReferenceType();
7910         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7911         FirstPointerInComplexData = false;
7912       }
7913     }
7914 
7915     if (!IsNonContiguous)
7916       return;
7917 
7918     const ASTContext &Context = CGF.getContext();
7919 
7920     // For supporting stride in array section, we need to initialize the first
7921     // dimension size as 1, first offset as 0, and first count as 1
7922     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7923     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7924     MapValuesArrayTy CurStrides;
7925     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7926     uint64_t ElementTypeSize;
7927 
7928     // Collect Size information for each dimension and get the element size as
7929     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7930     // should be [10, 10] and the first stride is 4 btyes.
7931     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7932          Components) {
7933       const Expr *AssocExpr = Component.getAssociatedExpression();
7934       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7935 
7936       if (!OASE)
7937         continue;
7938 
7939       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7940       auto *CAT = Context.getAsConstantArrayType(Ty);
7941       auto *VAT = Context.getAsVariableArrayType(Ty);
7942 
7943       // We need all the dimension size except for the last dimension.
7944       assert((VAT || CAT || &Component == &*Components.begin()) &&
7945              "Should be either ConstantArray or VariableArray if not the "
7946              "first Component");
7947 
7948       // Get element size if CurStrides is empty.
7949       if (CurStrides.empty()) {
7950         const Type *ElementType = nullptr;
7951         if (CAT)
7952           ElementType = CAT->getElementType().getTypePtr();
7953         else if (VAT)
7954           ElementType = VAT->getElementType().getTypePtr();
7955         else
7956           assert(&Component == &*Components.begin() &&
7957                  "Only expect pointer (non CAT or VAT) when this is the "
7958                  "first Component");
7959         // If ElementType is null, then it means the base is a pointer
7960         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7961         // for next iteration.
7962         if (ElementType) {
7963           // For the case that having pointer as base, we need to remove one
7964           // level of indirection.
7965           if (&Component != &*Components.begin())
7966             ElementType = ElementType->getPointeeOrArrayElementType();
7967           ElementTypeSize =
7968               Context.getTypeSizeInChars(ElementType).getQuantity();
7969           CurStrides.push_back(
7970               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7971         }
7972       }
7973       // Get dimension value except for the last dimension since we don't need
7974       // it.
7975       if (DimSizes.size() < Components.size() - 1) {
7976         if (CAT)
7977           DimSizes.push_back(llvm::ConstantInt::get(
7978               CGF.Int64Ty, CAT->getSize().getZExtValue()));
7979         else if (VAT)
7980           DimSizes.push_back(CGF.Builder.CreateIntCast(
7981               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7982               /*IsSigned=*/false));
7983       }
7984     }
7985 
7986     // Skip the dummy dimension since we have already have its information.
7987     auto DI = DimSizes.begin() + 1;
7988     // Product of dimension.
7989     llvm::Value *DimProd =
7990         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7991 
7992     // Collect info for non-contiguous. Notice that offset, count, and stride
7993     // are only meaningful for array-section, so we insert a null for anything
7994     // other than array-section.
7995     // Also, the size of offset, count, and stride are not the same as
7996     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7997     // count, and stride are the same as the number of non-contiguous
7998     // declaration in target update to/from clause.
7999     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8000          Components) {
8001       const Expr *AssocExpr = Component.getAssociatedExpression();
8002 
8003       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8004         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8005             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8006             /*isSigned=*/false);
8007         CurOffsets.push_back(Offset);
8008         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8009         CurStrides.push_back(CurStrides.back());
8010         continue;
8011       }
8012 
8013       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8014 
8015       if (!OASE)
8016         continue;
8017 
8018       // Offset
8019       const Expr *OffsetExpr = OASE->getLowerBound();
8020       llvm::Value *Offset = nullptr;
8021       if (!OffsetExpr) {
8022         // If offset is absent, then we just set it to zero.
8023         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8024       } else {
8025         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8026                                            CGF.Int64Ty,
8027                                            /*isSigned=*/false);
8028       }
8029       CurOffsets.push_back(Offset);
8030 
8031       // Count
8032       const Expr *CountExpr = OASE->getLength();
8033       llvm::Value *Count = nullptr;
8034       if (!CountExpr) {
8035         // In Clang, once a high dimension is an array section, we construct all
8036         // the lower dimension as array section, however, for case like
8037         // arr[0:2][2], Clang construct the inner dimension as an array section
8038         // but it actually is not in an array section form according to spec.
8039         if (!OASE->getColonLocFirst().isValid() &&
8040             !OASE->getColonLocSecond().isValid()) {
8041           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8042         } else {
8043           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8044           // When the length is absent it defaults to ⌈(size −
8045           // lower-bound)/stride⌉, where size is the size of the array
8046           // dimension.
8047           const Expr *StrideExpr = OASE->getStride();
8048           llvm::Value *Stride =
8049               StrideExpr
8050                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8051                                               CGF.Int64Ty, /*isSigned=*/false)
8052                   : nullptr;
8053           if (Stride)
8054             Count = CGF.Builder.CreateUDiv(
8055                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8056           else
8057             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8058         }
8059       } else {
8060         Count = CGF.EmitScalarExpr(CountExpr);
8061       }
8062       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8063       CurCounts.push_back(Count);
8064 
8065       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8066       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8067       //              Offset      Count     Stride
8068       //    D0          0           1         4    (int)    <- dummy dimension
8069       //    D1          0           2         8    (2 * (1) * 4)
8070       //    D2          1           2         20   (1 * (1 * 5) * 4)
8071       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8072       const Expr *StrideExpr = OASE->getStride();
8073       llvm::Value *Stride =
8074           StrideExpr
8075               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8076                                           CGF.Int64Ty, /*isSigned=*/false)
8077               : nullptr;
8078       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8079       if (Stride)
8080         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8081       else
8082         CurStrides.push_back(DimProd);
8083       if (DI != DimSizes.end())
8084         ++DI;
8085     }
8086 
8087     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8088     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8089     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8090   }
8091 
8092   /// Return the adjusted map modifiers if the declaration a capture refers to
8093   /// appears in a first-private clause. This is expected to be used only with
8094   /// directives that start with 'target'.
8095   MappableExprsHandler::OpenMPOffloadMappingFlags
8096   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8097     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8098 
8099     // A first private variable captured by reference will use only the
8100     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8101     // declaration is known as first-private in this handler.
8102     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8103       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8104           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8105         return MappableExprsHandler::OMP_MAP_ALWAYS |
8106                MappableExprsHandler::OMP_MAP_TO;
8107       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8108         return MappableExprsHandler::OMP_MAP_TO |
8109                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8110       return MappableExprsHandler::OMP_MAP_PRIVATE |
8111              MappableExprsHandler::OMP_MAP_TO;
8112     }
8113     return MappableExprsHandler::OMP_MAP_TO |
8114            MappableExprsHandler::OMP_MAP_FROM;
8115   }
8116 
8117   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8118     // Rotate by getFlagMemberOffset() bits.
8119     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8120                                                   << getFlagMemberOffset());
8121   }
8122 
8123   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8124                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8125     // If the entry is PTR_AND_OBJ but has not been marked with the special
8126     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8127     // marked as MEMBER_OF.
8128     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8129         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8130       return;
8131 
8132     // Reset the placeholder value to prepare the flag for the assignment of the
8133     // proper MEMBER_OF value.
8134     Flags &= ~OMP_MAP_MEMBER_OF;
8135     Flags |= MemberOfFlag;
8136   }
8137 
8138   void getPlainLayout(const CXXRecordDecl *RD,
8139                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8140                       bool AsBase) const {
8141     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8142 
8143     llvm::StructType *St =
8144         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8145 
8146     unsigned NumElements = St->getNumElements();
8147     llvm::SmallVector<
8148         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8149         RecordLayout(NumElements);
8150 
8151     // Fill bases.
8152     for (const auto &I : RD->bases()) {
8153       if (I.isVirtual())
8154         continue;
8155       const auto *Base = I.getType()->getAsCXXRecordDecl();
8156       // Ignore empty bases.
8157       if (Base->isEmpty() || CGF.getContext()
8158                                  .getASTRecordLayout(Base)
8159                                  .getNonVirtualSize()
8160                                  .isZero())
8161         continue;
8162 
8163       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8164       RecordLayout[FieldIndex] = Base;
8165     }
8166     // Fill in virtual bases.
8167     for (const auto &I : RD->vbases()) {
8168       const auto *Base = I.getType()->getAsCXXRecordDecl();
8169       // Ignore empty bases.
8170       if (Base->isEmpty())
8171         continue;
8172       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8173       if (RecordLayout[FieldIndex])
8174         continue;
8175       RecordLayout[FieldIndex] = Base;
8176     }
8177     // Fill in all the fields.
8178     assert(!RD->isUnion() && "Unexpected union.");
8179     for (const auto *Field : RD->fields()) {
8180       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8181       // will fill in later.)
8182       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8183         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8184         RecordLayout[FieldIndex] = Field;
8185       }
8186     }
8187     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8188              &Data : RecordLayout) {
8189       if (Data.isNull())
8190         continue;
8191       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8192         getPlainLayout(Base, Layout, /*AsBase=*/true);
8193       else
8194         Layout.push_back(Data.get<const FieldDecl *>());
8195     }
8196   }
8197 
8198 public:
8199   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8200       : CurDir(&Dir), CGF(CGF) {
8201     // Extract firstprivate clause information.
8202     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8203       for (const auto *D : C->varlists())
8204         FirstPrivateDecls.try_emplace(
8205             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8206     // Extract implicit firstprivates from uses_allocators clauses.
8207     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8208       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8209         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8210         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8211           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8212                                         /*Implicit=*/true);
8213         else if (const auto *VD = dyn_cast<VarDecl>(
8214                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8215                          ->getDecl()))
8216           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8217       }
8218     }
8219     // Extract device pointer clause information.
8220     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8221       for (auto L : C->component_lists())
8222         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8223   }
8224 
8225   /// Constructor for the declare mapper directive.
8226   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8227       : CurDir(&Dir), CGF(CGF) {}
8228 
8229   /// Generate code for the combined entry if we have a partially mapped struct
8230   /// and take care of the mapping flags of the arguments corresponding to
8231   /// individual struct members.
8232   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8233                          MapFlagsArrayTy &CurTypes,
8234                          const StructRangeInfoTy &PartialStruct,
8235                          const ValueDecl *VD = nullptr,
8236                          bool NotTargetParams = true) const {
8237     if (CurTypes.size() == 1 &&
8238         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8239         !PartialStruct.IsArraySection)
8240       return;
8241     CombinedInfo.Exprs.push_back(VD);
8242     // Base is the base of the struct
8243     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8244     // Pointer is the address of the lowest element
8245     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
8246     CombinedInfo.Pointers.push_back(LB);
8247     // There should not be a mapper for a combined entry.
8248     CombinedInfo.Mappers.push_back(nullptr);
8249     // Size is (addr of {highest+1} element) - (addr of lowest element)
8250     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
8251     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8252     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8253     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8254     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8255     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8256                                                   /*isSigned=*/false);
8257     CombinedInfo.Sizes.push_back(Size);
8258     // Map type is always TARGET_PARAM, if generate info for captures.
8259     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8260                                                  : OMP_MAP_TARGET_PARAM);
8261     // If any element has the present modifier, then make sure the runtime
8262     // doesn't attempt to allocate the struct.
8263     if (CurTypes.end() !=
8264         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8265           return Type & OMP_MAP_PRESENT;
8266         }))
8267       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8268     // Remove TARGET_PARAM flag from the first element if any.
8269     if (!CurTypes.empty())
8270       CurTypes.front() &= ~OMP_MAP_TARGET_PARAM;
8271 
8272     // All other current entries will be MEMBER_OF the combined entry
8273     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8274     // 0xFFFF in the MEMBER_OF field).
8275     OpenMPOffloadMappingFlags MemberOfFlag =
8276         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8277     for (auto &M : CurTypes)
8278       setCorrectMemberOfFlag(M, MemberOfFlag);
8279   }
8280 
8281   /// Generate all the base pointers, section pointers, sizes, map types, and
8282   /// mappers for the extracted mappable expressions (all included in \a
8283   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8284   /// pair of the relevant declaration and index where it occurs is appended to
8285   /// the device pointers info array.
8286   void generateAllInfo(
8287       MapCombinedInfoTy &CombinedInfo,
8288       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8289           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8290     // We have to process the component lists that relate with the same
8291     // declaration in a single chunk so that we can generate the map flags
8292     // correctly. Therefore, we organize all lists in a map.
8293     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8294 
8295     // Helper function to fill the information map for the different supported
8296     // clauses.
8297     auto &&InfoGen =
8298         [&Info, &SkipVarSet](
8299             const ValueDecl *D,
8300             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8301             OpenMPMapClauseKind MapType,
8302             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8303             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8304             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8305             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8306           const ValueDecl *VD =
8307               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8308           if (SkipVarSet.count(VD))
8309             return;
8310           Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers,
8311                                 ReturnDevicePointer, IsImplicit, Mapper, VarRef,
8312                                 ForDeviceAddr);
8313         };
8314 
8315     assert(CurDir.is<const OMPExecutableDirective *>() &&
8316            "Expect a executable directive");
8317     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8318     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8319       const auto *EI = C->getVarRefs().begin();
8320       for (const auto L : C->component_lists()) {
8321         // The Expression is not correct if the mapping is implicit
8322         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8323         InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(),
8324                 C->getMapTypeModifiers(), llvm::None,
8325                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8326                 E);
8327         ++EI;
8328       }
8329     }
8330     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) {
8331       const auto *EI = C->getVarRefs().begin();
8332       for (const auto L : C->component_lists()) {
8333         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
8334                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8335                 C->isImplicit(), std::get<2>(L), *EI);
8336         ++EI;
8337       }
8338     }
8339     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) {
8340       const auto *EI = C->getVarRefs().begin();
8341       for (const auto L : C->component_lists()) {
8342         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
8343                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8344                 C->isImplicit(), std::get<2>(L), *EI);
8345         ++EI;
8346       }
8347     }
8348 
8349     // Look at the use_device_ptr clause information and mark the existing map
8350     // entries as such. If there is no map information for an entry in the
8351     // use_device_ptr list, we create one with map type 'alloc' and zero size
8352     // section. It is the user fault if that was not mapped before. If there is
8353     // no map information and the pointer is a struct member, then we defer the
8354     // emission of that entry until the whole struct has been processed.
8355     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8356         DeferredInfo;
8357     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8358 
8359     for (const auto *C :
8360          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8361       for (const auto L : C->component_lists()) {
8362         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8363             std::get<1>(L);
8364         assert(!Components.empty() &&
8365                "Not expecting empty list of components!");
8366         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8367         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8368         const Expr *IE = Components.back().getAssociatedExpression();
8369         // If the first component is a member expression, we have to look into
8370         // 'this', which maps to null in the map of map information. Otherwise
8371         // look directly for the information.
8372         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8373 
8374         // We potentially have map information for this declaration already.
8375         // Look for the first set of components that refer to it.
8376         if (It != Info.end()) {
8377           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8378             return MI.Components.back().getAssociatedDeclaration() == VD;
8379           });
8380           // If we found a map entry, signal that the pointer has to be returned
8381           // and move on to the next declaration.
8382           // Exclude cases where the base pointer is mapped as array subscript,
8383           // array section or array shaping. The base address is passed as a
8384           // pointer to base in this case and cannot be used as a base for
8385           // use_device_ptr list item.
8386           if (CI != It->second.end()) {
8387             auto PrevCI = std::next(CI->Components.rbegin());
8388             const auto *VarD = dyn_cast<VarDecl>(VD);
8389             if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8390                 isa<MemberExpr>(IE) ||
8391                 !VD->getType().getNonReferenceType()->isPointerType() ||
8392                 PrevCI == CI->Components.rend() ||
8393                 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8394                 VarD->hasLocalStorage()) {
8395               CI->ReturnDevicePointer = true;
8396               continue;
8397             }
8398           }
8399         }
8400 
8401         // We didn't find any match in our map information - generate a zero
8402         // size array section - if the pointer is a struct member we defer this
8403         // action until the whole struct has been processed.
8404         if (isa<MemberExpr>(IE)) {
8405           // Insert the pointer into Info to be processed by
8406           // generateInfoForComponentList. Because it is a member pointer
8407           // without a pointee, no entry will be generated for it, therefore
8408           // we need to generate one after the whole struct has been processed.
8409           // Nonetheless, generateInfoForComponentList must be called to take
8410           // the pointer into account for the calculation of the range of the
8411           // partial struct.
8412           InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None,
8413                   /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr);
8414           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8415         } else {
8416           llvm::Value *Ptr =
8417               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8418           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8419           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8420           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8421           UseDevicePtrCombinedInfo.Sizes.push_back(
8422               llvm::Constant::getNullValue(CGF.Int64Ty));
8423           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8424           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8425         }
8426       }
8427     }
8428 
8429     // Look at the use_device_addr clause information and mark the existing map
8430     // entries as such. If there is no map information for an entry in the
8431     // use_device_addr list, we create one with map type 'alloc' and zero size
8432     // section. It is the user fault if that was not mapped before. If there is
8433     // no map information and the pointer is a struct member, then we defer the
8434     // emission of that entry until the whole struct has been processed.
8435     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8436     for (const auto *C :
8437          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8438       for (const auto L : C->component_lists()) {
8439         assert(!std::get<1>(L).empty() &&
8440                "Not expecting empty list of components!");
8441         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8442         if (!Processed.insert(VD).second)
8443           continue;
8444         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8445         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8446         // If the first component is a member expression, we have to look into
8447         // 'this', which maps to null in the map of map information. Otherwise
8448         // look directly for the information.
8449         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8450 
8451         // We potentially have map information for this declaration already.
8452         // Look for the first set of components that refer to it.
8453         if (It != Info.end()) {
8454           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8455             return MI.Components.back().getAssociatedDeclaration() == VD;
8456           });
8457           // If we found a map entry, signal that the pointer has to be returned
8458           // and move on to the next declaration.
8459           if (CI != It->second.end()) {
8460             CI->ReturnDevicePointer = true;
8461             continue;
8462           }
8463         }
8464 
8465         // We didn't find any match in our map information - generate a zero
8466         // size array section - if the pointer is a struct member we defer this
8467         // action until the whole struct has been processed.
8468         if (isa<MemberExpr>(IE)) {
8469           // Insert the pointer into Info to be processed by
8470           // generateInfoForComponentList. Because it is a member pointer
8471           // without a pointee, no entry will be generated for it, therefore
8472           // we need to generate one after the whole struct has been processed.
8473           // Nonetheless, generateInfoForComponentList must be called to take
8474           // the pointer into account for the calculation of the range of the
8475           // partial struct.
8476           InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8477                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8478                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8479           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8480         } else {
8481           llvm::Value *Ptr;
8482           if (IE->isGLValue())
8483             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8484           else
8485             Ptr = CGF.EmitScalarExpr(IE);
8486           CombinedInfo.Exprs.push_back(VD);
8487           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8488           CombinedInfo.Pointers.push_back(Ptr);
8489           CombinedInfo.Sizes.push_back(
8490               llvm::Constant::getNullValue(CGF.Int64Ty));
8491           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8492           CombinedInfo.Mappers.push_back(nullptr);
8493         }
8494       }
8495     }
8496 
8497     for (const auto &M : Info) {
8498       // Underlying variable declaration used in the map clause.
8499       const ValueDecl *VD = std::get<0>(M);
8500 
8501       // Temporary generated information.
8502       MapCombinedInfoTy CurInfo;
8503       StructRangeInfoTy PartialStruct;
8504 
8505       for (const MapInfo &L : M.second) {
8506         assert(!L.Components.empty() &&
8507                "Not expecting declaration with no component lists.");
8508 
8509         // Remember the current base pointer index.
8510         unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8511         CurInfo.NonContigInfo.IsNonContiguous =
8512             L.Components.back().isNonContiguous();
8513         generateInfoForComponentList(
8514             L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo,
8515             PartialStruct, /*IsFirstComponentList=*/false, L.IsImplicit,
8516             L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8517 
8518         // If this entry relates with a device pointer, set the relevant
8519         // declaration and add the 'return pointer' flag.
8520         if (L.ReturnDevicePointer) {
8521           assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8522                  "Unexpected number of mapped base pointers.");
8523 
8524           const ValueDecl *RelevantVD =
8525               L.Components.back().getAssociatedDeclaration();
8526           assert(RelevantVD &&
8527                  "No relevant declaration related with device pointer??");
8528 
8529           CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8530               RelevantVD);
8531           CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8532         }
8533       }
8534 
8535       // Append any pending zero-length pointers which are struct members and
8536       // used with use_device_ptr or use_device_addr.
8537       auto CI = DeferredInfo.find(M.first);
8538       if (CI != DeferredInfo.end()) {
8539         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8540           llvm::Value *BasePtr;
8541           llvm::Value *Ptr;
8542           if (L.ForDeviceAddr) {
8543             if (L.IE->isGLValue())
8544               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8545             else
8546               Ptr = this->CGF.EmitScalarExpr(L.IE);
8547             BasePtr = Ptr;
8548             // Entry is RETURN_PARAM. Also, set the placeholder value
8549             // MEMBER_OF=FFFF so that the entry is later updated with the
8550             // correct value of MEMBER_OF.
8551             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8552           } else {
8553             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8554             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8555                                              L.IE->getExprLoc());
8556             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8557             // value MEMBER_OF=FFFF so that the entry is later updated with the
8558             // correct value of MEMBER_OF.
8559             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8560                                     OMP_MAP_MEMBER_OF);
8561           }
8562           CurInfo.Exprs.push_back(L.VD);
8563           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8564           CurInfo.Pointers.push_back(Ptr);
8565           CurInfo.Sizes.push_back(
8566               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8567           CurInfo.Mappers.push_back(nullptr);
8568         }
8569       }
8570 
8571       // If there is an entry in PartialStruct it means we have a struct with
8572       // individual members mapped. Emit an extra combined entry.
8573       if (PartialStruct.Base.isValid())
8574         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8575 
8576       // We need to append the results of this capture to what we already have.
8577       CombinedInfo.append(CurInfo);
8578     }
8579     // Append data for use_device_ptr clauses.
8580     CombinedInfo.append(UseDevicePtrCombinedInfo);
8581   }
8582 
8583   /// Generate all the base pointers, section pointers, sizes, map types, and
8584   /// mappers for the extracted map clauses of user-defined mapper (all included
8585   /// in \a CombinedInfo).
8586   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8587     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8588            "Expect a declare mapper directive");
8589     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8590     // We have to process the component lists that relate with the same
8591     // declaration in a single chunk so that we can generate the map flags
8592     // correctly. Therefore, we organize all lists in a map.
8593     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8594 
8595     // Fill the information map for map clauses.
8596     for (const auto *C : CurMapperDir->clauselists()) {
8597       const auto *MC = cast<OMPMapClause>(C);
8598       const auto *EI = MC->getVarRefs().begin();
8599       for (const auto L : MC->component_lists()) {
8600         // The Expression is not correct if the mapping is implicit
8601         const Expr *E = (MC->getMapLoc().isValid()) ? *EI : nullptr;
8602         const ValueDecl *VD =
8603             std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl())
8604                            : nullptr;
8605         // Get the corresponding user-defined mapper.
8606         Info[VD].emplace_back(std::get<1>(L), MC->getMapType(),
8607                               MC->getMapTypeModifiers(), llvm::None,
8608                               /*ReturnDevicePointer=*/false, MC->isImplicit(),
8609                               std::get<2>(L), E);
8610         ++EI;
8611       }
8612     }
8613 
8614     for (const auto &M : Info) {
8615       // We need to know when we generate information for the first component
8616       // associated with a capture, because the mapping flags depend on it.
8617       bool IsFirstComponentList = true;
8618 
8619       // Underlying variable declaration used in the map clause.
8620       const ValueDecl *VD = std::get<0>(M);
8621 
8622       // Temporary generated information.
8623       MapCombinedInfoTy CurInfo;
8624       StructRangeInfoTy PartialStruct;
8625 
8626       for (const MapInfo &L : M.second) {
8627         assert(!L.Components.empty() &&
8628                "Not expecting declaration with no component lists.");
8629         generateInfoForComponentList(
8630             L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo,
8631             PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper,
8632             L.ForDeviceAddr, VD, L.VarRef);
8633         IsFirstComponentList = false;
8634       }
8635 
8636       // If there is an entry in PartialStruct it means we have a struct with
8637       // individual members mapped. Emit an extra combined entry.
8638       if (PartialStruct.Base.isValid()) {
8639         CurInfo.NonContigInfo.Dims.push_back(0);
8640         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8641       }
8642 
8643       // We need to append the results of this capture to what we already have.
8644       CombinedInfo.append(CurInfo);
8645     }
8646   }
8647 
8648   /// Emit capture info for lambdas for variables captured by reference.
8649   void generateInfoForLambdaCaptures(
8650       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8651       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8652     const auto *RD = VD->getType()
8653                          .getCanonicalType()
8654                          .getNonReferenceType()
8655                          ->getAsCXXRecordDecl();
8656     if (!RD || !RD->isLambda())
8657       return;
8658     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8659     LValue VDLVal = CGF.MakeAddrLValue(
8660         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8661     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8662     FieldDecl *ThisCapture = nullptr;
8663     RD->getCaptureFields(Captures, ThisCapture);
8664     if (ThisCapture) {
8665       LValue ThisLVal =
8666           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8667       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8668       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8669                                  VDLVal.getPointer(CGF));
8670       CombinedInfo.Exprs.push_back(VD);
8671       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8672       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8673       CombinedInfo.Sizes.push_back(
8674           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8675                                     CGF.Int64Ty, /*isSigned=*/true));
8676       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8677                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8678       CombinedInfo.Mappers.push_back(nullptr);
8679     }
8680     for (const LambdaCapture &LC : RD->captures()) {
8681       if (!LC.capturesVariable())
8682         continue;
8683       const VarDecl *VD = LC.getCapturedVar();
8684       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8685         continue;
8686       auto It = Captures.find(VD);
8687       assert(It != Captures.end() && "Found lambda capture without field.");
8688       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8689       if (LC.getCaptureKind() == LCK_ByRef) {
8690         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8691         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8692                                    VDLVal.getPointer(CGF));
8693         CombinedInfo.Exprs.push_back(VD);
8694         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8695         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8696         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8697             CGF.getTypeSize(
8698                 VD->getType().getCanonicalType().getNonReferenceType()),
8699             CGF.Int64Ty, /*isSigned=*/true));
8700       } else {
8701         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8702         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8703                                    VDLVal.getPointer(CGF));
8704         CombinedInfo.Exprs.push_back(VD);
8705         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8706         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8707         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8708       }
8709       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8710                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8711       CombinedInfo.Mappers.push_back(nullptr);
8712     }
8713   }
8714 
8715   /// Set correct indices for lambdas captures.
8716   void adjustMemberOfForLambdaCaptures(
8717       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8718       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8719       MapFlagsArrayTy &Types) const {
8720     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8721       // Set correct member_of idx for all implicit lambda captures.
8722       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8723                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8724         continue;
8725       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8726       assert(BasePtr && "Unable to find base lambda address.");
8727       int TgtIdx = -1;
8728       for (unsigned J = I; J > 0; --J) {
8729         unsigned Idx = J - 1;
8730         if (Pointers[Idx] != BasePtr)
8731           continue;
8732         TgtIdx = Idx;
8733         break;
8734       }
8735       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8736       // All other current entries will be MEMBER_OF the combined entry
8737       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8738       // 0xFFFF in the MEMBER_OF field).
8739       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8740       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8741     }
8742   }
8743 
8744   /// Generate the base pointers, section pointers, sizes, map types, and
8745   /// mappers associated to a given capture (all included in \a CombinedInfo).
8746   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8747                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8748                               StructRangeInfoTy &PartialStruct) const {
8749     assert(!Cap->capturesVariableArrayType() &&
8750            "Not expecting to generate map info for a variable array type!");
8751 
8752     // We need to know when we generating information for the first component
8753     const ValueDecl *VD = Cap->capturesThis()
8754                               ? nullptr
8755                               : Cap->getCapturedVar()->getCanonicalDecl();
8756 
8757     // If this declaration appears in a is_device_ptr clause we just have to
8758     // pass the pointer by value. If it is a reference to a declaration, we just
8759     // pass its value.
8760     if (DevPointersMap.count(VD)) {
8761       CombinedInfo.Exprs.push_back(VD);
8762       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8763       CombinedInfo.Pointers.push_back(Arg);
8764       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8765           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8766           /*isSigned=*/true));
8767       CombinedInfo.Types.push_back(
8768           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8769           OMP_MAP_TARGET_PARAM);
8770       CombinedInfo.Mappers.push_back(nullptr);
8771       return;
8772     }
8773 
8774     using MapData =
8775         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8776                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8777                    const ValueDecl *, const Expr *>;
8778     SmallVector<MapData, 4> DeclComponentLists;
8779     assert(CurDir.is<const OMPExecutableDirective *>() &&
8780            "Expect a executable directive");
8781     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8782     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8783       const auto *EI = C->getVarRefs().begin();
8784       for (const auto L : C->decl_component_lists(VD)) {
8785         const ValueDecl *VDecl, *Mapper;
8786         // The Expression is not correct if the mapping is implicit
8787         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8788         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8789         std::tie(VDecl, Components, Mapper) = L;
8790         assert(VDecl == VD && "We got information for the wrong declaration??");
8791         assert(!Components.empty() &&
8792                "Not expecting declaration with no component lists.");
8793         DeclComponentLists.emplace_back(Components, C->getMapType(),
8794                                         C->getMapTypeModifiers(),
8795                                         C->isImplicit(), Mapper, E);
8796         ++EI;
8797       }
8798     }
8799 
8800     // Find overlapping elements (including the offset from the base element).
8801     llvm::SmallDenseMap<
8802         const MapData *,
8803         llvm::SmallVector<
8804             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8805         4>
8806         OverlappedData;
8807     size_t Count = 0;
8808     for (const MapData &L : DeclComponentLists) {
8809       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8810       OpenMPMapClauseKind MapType;
8811       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8812       bool IsImplicit;
8813       const ValueDecl *Mapper;
8814       const Expr *VarRef;
8815       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8816           L;
8817       ++Count;
8818       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8819         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8820         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8821                  VarRef) = L1;
8822         auto CI = Components.rbegin();
8823         auto CE = Components.rend();
8824         auto SI = Components1.rbegin();
8825         auto SE = Components1.rend();
8826         for (; CI != CE && SI != SE; ++CI, ++SI) {
8827           if (CI->getAssociatedExpression()->getStmtClass() !=
8828               SI->getAssociatedExpression()->getStmtClass())
8829             break;
8830           // Are we dealing with different variables/fields?
8831           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8832             break;
8833         }
8834         // Found overlapping if, at least for one component, reached the head of
8835         // the components list.
8836         if (CI == CE || SI == SE) {
8837           assert((CI != CE || SI != SE) &&
8838                  "Unexpected full match of the mapping components.");
8839           const MapData &BaseData = CI == CE ? L : L1;
8840           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8841               SI == SE ? Components : Components1;
8842           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8843           OverlappedElements.getSecond().push_back(SubData);
8844         }
8845       }
8846     }
8847     // Sort the overlapped elements for each item.
8848     llvm::SmallVector<const FieldDecl *, 4> Layout;
8849     if (!OverlappedData.empty()) {
8850       if (const auto *CRD =
8851               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8852         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8853       else {
8854         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8855         Layout.append(RD->field_begin(), RD->field_end());
8856       }
8857     }
8858     for (auto &Pair : OverlappedData) {
8859       llvm::sort(
8860           Pair.getSecond(),
8861           [&Layout](
8862               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8863               OMPClauseMappableExprCommon::MappableExprComponentListRef
8864                   Second) {
8865             auto CI = First.rbegin();
8866             auto CE = First.rend();
8867             auto SI = Second.rbegin();
8868             auto SE = Second.rend();
8869             for (; CI != CE && SI != SE; ++CI, ++SI) {
8870               if (CI->getAssociatedExpression()->getStmtClass() !=
8871                   SI->getAssociatedExpression()->getStmtClass())
8872                 break;
8873               // Are we dealing with different variables/fields?
8874               if (CI->getAssociatedDeclaration() !=
8875                   SI->getAssociatedDeclaration())
8876                 break;
8877             }
8878 
8879             // Lists contain the same elements.
8880             if (CI == CE && SI == SE)
8881               return false;
8882 
8883             // List with less elements is less than list with more elements.
8884             if (CI == CE || SI == SE)
8885               return CI == CE;
8886 
8887             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8888             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8889             if (FD1->getParent() == FD2->getParent())
8890               return FD1->getFieldIndex() < FD2->getFieldIndex();
8891             const auto It =
8892                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8893                   return FD == FD1 || FD == FD2;
8894                 });
8895             return *It == FD1;
8896           });
8897     }
8898 
8899     // Associated with a capture, because the mapping flags depend on it.
8900     // Go through all of the elements with the overlapped elements.
8901     for (const auto &Pair : OverlappedData) {
8902       const MapData &L = *Pair.getFirst();
8903       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8904       OpenMPMapClauseKind MapType;
8905       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8906       bool IsImplicit;
8907       const ValueDecl *Mapper;
8908       const Expr *VarRef;
8909       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8910           L;
8911       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8912           OverlappedComponents = Pair.getSecond();
8913       bool IsFirstComponentList = true;
8914       generateInfoForComponentList(
8915           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
8916           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8917           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8918     }
8919     // Go through other elements without overlapped elements.
8920     bool IsFirstComponentList = OverlappedData.empty();
8921     for (const MapData &L : DeclComponentLists) {
8922       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8923       OpenMPMapClauseKind MapType;
8924       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8925       bool IsImplicit;
8926       const ValueDecl *Mapper;
8927       const Expr *VarRef;
8928       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8929           L;
8930       auto It = OverlappedData.find(&L);
8931       if (It == OverlappedData.end())
8932         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
8933                                      Components, CombinedInfo, PartialStruct,
8934                                      IsFirstComponentList, IsImplicit, Mapper,
8935                                      /*ForDeviceAddr=*/false, VD, VarRef);
8936       IsFirstComponentList = false;
8937     }
8938   }
8939 
8940   /// Generate the default map information for a given capture \a CI,
8941   /// record field declaration \a RI and captured value \a CV.
8942   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8943                               const FieldDecl &RI, llvm::Value *CV,
8944                               MapCombinedInfoTy &CombinedInfo) const {
8945     bool IsImplicit = true;
8946     // Do the default mapping.
8947     if (CI.capturesThis()) {
8948       CombinedInfo.Exprs.push_back(nullptr);
8949       CombinedInfo.BasePointers.push_back(CV);
8950       CombinedInfo.Pointers.push_back(CV);
8951       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8952       CombinedInfo.Sizes.push_back(
8953           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8954                                     CGF.Int64Ty, /*isSigned=*/true));
8955       // Default map type.
8956       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8957     } else if (CI.capturesVariableByCopy()) {
8958       const VarDecl *VD = CI.getCapturedVar();
8959       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8960       CombinedInfo.BasePointers.push_back(CV);
8961       CombinedInfo.Pointers.push_back(CV);
8962       if (!RI.getType()->isAnyPointerType()) {
8963         // We have to signal to the runtime captures passed by value that are
8964         // not pointers.
8965         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
8966         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8967             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8968       } else {
8969         // Pointers are implicitly mapped with a zero size and no flags
8970         // (other than first map that is added for all implicit maps).
8971         CombinedInfo.Types.push_back(OMP_MAP_NONE);
8972         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8973       }
8974       auto I = FirstPrivateDecls.find(VD);
8975       if (I != FirstPrivateDecls.end())
8976         IsImplicit = I->getSecond();
8977     } else {
8978       assert(CI.capturesVariable() && "Expected captured reference.");
8979       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8980       QualType ElementType = PtrTy->getPointeeType();
8981       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8982           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8983       // The default map type for a scalar/complex type is 'to' because by
8984       // default the value doesn't have to be retrieved. For an aggregate
8985       // type, the default is 'tofrom'.
8986       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8987       const VarDecl *VD = CI.getCapturedVar();
8988       auto I = FirstPrivateDecls.find(VD);
8989       if (I != FirstPrivateDecls.end() &&
8990           VD->getType().isConstant(CGF.getContext())) {
8991         llvm::Constant *Addr =
8992             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8993         // Copy the value of the original variable to the new global copy.
8994         CGF.Builder.CreateMemCpy(
8995             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8996             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8997             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
8998         // Use new global variable as the base pointers.
8999         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9000         CombinedInfo.BasePointers.push_back(Addr);
9001         CombinedInfo.Pointers.push_back(Addr);
9002       } else {
9003         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9004         CombinedInfo.BasePointers.push_back(CV);
9005         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9006           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9007               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9008               AlignmentSource::Decl));
9009           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9010         } else {
9011           CombinedInfo.Pointers.push_back(CV);
9012         }
9013       }
9014       if (I != FirstPrivateDecls.end())
9015         IsImplicit = I->getSecond();
9016     }
9017     // Every default map produces a single argument which is a target parameter.
9018     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9019 
9020     // Add flag stating this is an implicit map.
9021     if (IsImplicit)
9022       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9023 
9024     // No user-defined mapper for default mapping.
9025     CombinedInfo.Mappers.push_back(nullptr);
9026   }
9027 };
9028 } // anonymous namespace
9029 
9030 static void emitNonContiguousDescriptor(
9031     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9032     CGOpenMPRuntime::TargetDataInfo &Info) {
9033   CodeGenModule &CGM = CGF.CGM;
9034   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9035       &NonContigInfo = CombinedInfo.NonContigInfo;
9036 
9037   // Build an array of struct descriptor_dim and then assign it to
9038   // offload_args.
9039   //
9040   // struct descriptor_dim {
9041   //  uint64_t offset;
9042   //  uint64_t count;
9043   //  uint64_t stride
9044   // };
9045   ASTContext &C = CGF.getContext();
9046   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9047   RecordDecl *RD;
9048   RD = C.buildImplicitRecord("descriptor_dim");
9049   RD->startDefinition();
9050   addFieldToRecordDecl(C, RD, Int64Ty);
9051   addFieldToRecordDecl(C, RD, Int64Ty);
9052   addFieldToRecordDecl(C, RD, Int64Ty);
9053   RD->completeDefinition();
9054   QualType DimTy = C.getRecordType(RD);
9055 
9056   enum { OffsetFD = 0, CountFD, StrideFD };
9057   // We need two index variable here since the size of "Dims" is the same as the
9058   // size of Components, however, the size of offset, count, and stride is equal
9059   // to the size of base declaration that is non-contiguous.
9060   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9061     // Skip emitting ir if dimension size is 1 since it cannot be
9062     // non-contiguous.
9063     if (NonContigInfo.Dims[I] == 1)
9064       continue;
9065     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9066     QualType ArrayTy =
9067         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9068     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9069     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9070       unsigned RevIdx = EE - II - 1;
9071       LValue DimsLVal = CGF.MakeAddrLValue(
9072           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9073       // Offset
9074       LValue OffsetLVal = CGF.EmitLValueForField(
9075           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9076       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9077       // Count
9078       LValue CountLVal = CGF.EmitLValueForField(
9079           DimsLVal, *std::next(RD->field_begin(), CountFD));
9080       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9081       // Stride
9082       LValue StrideLVal = CGF.EmitLValueForField(
9083           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9084       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9085     }
9086     // args[I] = &dims
9087     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9088         DimsAddr, CGM.Int8PtrTy);
9089     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9090         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9091         Info.PointersArray, 0, I);
9092     Address PAddr(P, CGF.getPointerAlign());
9093     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9094     ++L;
9095   }
9096 }
9097 
9098 /// Emit a string constant containing the names of the values mapped to the
9099 /// offloading runtime library.
9100 llvm::Constant *
9101 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9102                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9103   llvm::Constant *SrcLocStr;
9104   if (!MapExprs.getMapDecl()) {
9105     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9106   } else {
9107     std::string ExprName = "";
9108     if (MapExprs.getMapExpr()) {
9109       PrintingPolicy P(CGF.getContext().getLangOpts());
9110       llvm::raw_string_ostream OS(ExprName);
9111       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9112       OS.flush();
9113     } else {
9114       ExprName = MapExprs.getMapDecl()->getNameAsString();
9115     }
9116 
9117     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9118     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9119     const char *FileName = PLoc.getFilename();
9120     unsigned Line = PLoc.getLine();
9121     unsigned Column = PLoc.getColumn();
9122     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9123                                                 Line, Column);
9124   }
9125 
9126   return SrcLocStr;
9127 }
9128 
9129 /// Emit the arrays used to pass the captures and map information to the
9130 /// offloading runtime library. If there is no map or capture information,
9131 /// return nullptr by reference.
9132 static void emitOffloadingArrays(
9133     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9134     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9135     bool IsNonContiguous = false) {
9136   CodeGenModule &CGM = CGF.CGM;
9137   ASTContext &Ctx = CGF.getContext();
9138 
9139   // Reset the array information.
9140   Info.clearArrayInfo();
9141   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9142 
9143   if (Info.NumberOfPtrs) {
9144     // Detect if we have any capture size requiring runtime evaluation of the
9145     // size so that a constant array could be eventually used.
9146     bool hasRuntimeEvaluationCaptureSize = false;
9147     for (llvm::Value *S : CombinedInfo.Sizes)
9148       if (!isa<llvm::Constant>(S)) {
9149         hasRuntimeEvaluationCaptureSize = true;
9150         break;
9151       }
9152 
9153     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9154     QualType PointerArrayType = Ctx.getConstantArrayType(
9155         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9156         /*IndexTypeQuals=*/0);
9157 
9158     Info.BasePointersArray =
9159         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9160     Info.PointersArray =
9161         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9162     Address MappersArray =
9163         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9164     Info.MappersArray = MappersArray.getPointer();
9165 
9166     // If we don't have any VLA types or other types that require runtime
9167     // evaluation, we can use a constant array for the map sizes, otherwise we
9168     // need to fill up the arrays as we do for the pointers.
9169     QualType Int64Ty =
9170         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9171     if (hasRuntimeEvaluationCaptureSize) {
9172       QualType SizeArrayType = Ctx.getConstantArrayType(
9173           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9174           /*IndexTypeQuals=*/0);
9175       Info.SizesArray =
9176           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9177     } else {
9178       // We expect all the sizes to be constant, so we collect them to create
9179       // a constant array.
9180       SmallVector<llvm::Constant *, 16> ConstSizes;
9181       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9182         if (IsNonContiguous &&
9183             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9184           ConstSizes.push_back(llvm::ConstantInt::get(
9185               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9186         } else {
9187           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9188         }
9189       }
9190 
9191       auto *SizesArrayInit = llvm::ConstantArray::get(
9192           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9193       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9194       auto *SizesArrayGbl = new llvm::GlobalVariable(
9195           CGM.getModule(), SizesArrayInit->getType(),
9196           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9197           SizesArrayInit, Name);
9198       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9199       Info.SizesArray = SizesArrayGbl;
9200     }
9201 
9202     // The map types are always constant so we don't need to generate code to
9203     // fill arrays. Instead, we create an array constant.
9204     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9205     llvm::copy(CombinedInfo.Types, Mapping.begin());
9206     llvm::Constant *MapTypesArrayInit =
9207         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9208     std::string MaptypesName =
9209         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9210     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9211         CGM.getModule(), MapTypesArrayInit->getType(),
9212         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9213         MapTypesArrayInit, MaptypesName);
9214     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9215     Info.MapTypesArray = MapTypesArrayGbl;
9216 
9217     // The information types are only built if there is debug information
9218     // requested.
9219     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9220       Info.MapNamesArray = llvm::Constant::getNullValue(
9221           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9222     } else {
9223       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9224         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9225       };
9226       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9227       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9228 
9229       llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
9230           llvm::ArrayType::get(
9231               llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(),
9232               CombinedInfo.Exprs.size()),
9233           InfoMap);
9234       auto *MapNamesArrayGbl = new llvm::GlobalVariable(
9235           CGM.getModule(), MapNamesArrayInit->getType(),
9236           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9237           MapNamesArrayInit,
9238           CGM.getOpenMPRuntime().getName({"offload_mapnames"}));
9239       Info.MapNamesArray = MapNamesArrayGbl;
9240     }
9241 
9242     // If there's a present map type modifier, it must not be applied to the end
9243     // of a region, so generate a separate map type array in that case.
9244     if (Info.separateBeginEndCalls()) {
9245       bool EndMapTypesDiffer = false;
9246       for (uint64_t &Type : Mapping) {
9247         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9248           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9249           EndMapTypesDiffer = true;
9250         }
9251       }
9252       if (EndMapTypesDiffer) {
9253         MapTypesArrayInit =
9254             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9255         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9256         MapTypesArrayGbl = new llvm::GlobalVariable(
9257             CGM.getModule(), MapTypesArrayInit->getType(),
9258             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9259             MapTypesArrayInit, MaptypesName);
9260         MapTypesArrayGbl->setUnnamedAddr(
9261             llvm::GlobalValue::UnnamedAddr::Global);
9262         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9263       }
9264     }
9265 
9266     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9267       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9268       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9269           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9270           Info.BasePointersArray, 0, I);
9271       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9272           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9273       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9274       CGF.Builder.CreateStore(BPVal, BPAddr);
9275 
9276       if (Info.requiresDevicePointerInfo())
9277         if (const ValueDecl *DevVD =
9278                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9279           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9280 
9281       llvm::Value *PVal = CombinedInfo.Pointers[I];
9282       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9283           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9284           Info.PointersArray, 0, I);
9285       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9286           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9287       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9288       CGF.Builder.CreateStore(PVal, PAddr);
9289 
9290       if (hasRuntimeEvaluationCaptureSize) {
9291         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9292             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9293             Info.SizesArray,
9294             /*Idx0=*/0,
9295             /*Idx1=*/I);
9296         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9297         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9298                                                           CGM.Int64Ty,
9299                                                           /*isSigned=*/true),
9300                                 SAddr);
9301       }
9302 
9303       // Fill up the mapper array.
9304       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9305       if (CombinedInfo.Mappers[I]) {
9306         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9307             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9308         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9309         Info.HasMapper = true;
9310       }
9311       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9312       CGF.Builder.CreateStore(MFunc, MAddr);
9313     }
9314   }
9315 
9316   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9317       Info.NumberOfPtrs == 0)
9318     return;
9319 
9320   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9321 }
9322 
9323 namespace {
9324 /// Additional arguments for emitOffloadingArraysArgument function.
9325 struct ArgumentsOptions {
9326   bool ForEndCall = false;
9327   ArgumentsOptions() = default;
9328   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9329 };
9330 } // namespace
9331 
9332 /// Emit the arguments to be passed to the runtime library based on the
9333 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9334 /// ForEndCall, emit map types to be passed for the end of the region instead of
9335 /// the beginning.
9336 static void emitOffloadingArraysArgument(
9337     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9338     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9339     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9340     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9341     const ArgumentsOptions &Options = ArgumentsOptions()) {
9342   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9343          "expected region end call to runtime only when end call is separate");
9344   CodeGenModule &CGM = CGF.CGM;
9345   if (Info.NumberOfPtrs) {
9346     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9347         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9348         Info.BasePointersArray,
9349         /*Idx0=*/0, /*Idx1=*/0);
9350     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9351         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9352         Info.PointersArray,
9353         /*Idx0=*/0,
9354         /*Idx1=*/0);
9355     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9356         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9357         /*Idx0=*/0, /*Idx1=*/0);
9358     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9359         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9360         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9361                                                     : Info.MapTypesArray,
9362         /*Idx0=*/0,
9363         /*Idx1=*/0);
9364 
9365     // Only emit the mapper information arrays if debug information is
9366     // requested.
9367     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9368       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9369     else
9370       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9371           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9372           Info.MapNamesArray,
9373           /*Idx0=*/0,
9374           /*Idx1=*/0);
9375     // If there is no user-defined mapper, set the mapper array to nullptr to
9376     // avoid an unnecessary data privatization
9377     if (!Info.HasMapper)
9378       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9379     else
9380       MappersArrayArg =
9381           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9382   } else {
9383     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9384     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9385     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9386     MapTypesArrayArg =
9387         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9388     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9389     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9390   }
9391 }
9392 
9393 /// Check for inner distribute directive.
9394 static const OMPExecutableDirective *
9395 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9396   const auto *CS = D.getInnermostCapturedStmt();
9397   const auto *Body =
9398       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9399   const Stmt *ChildStmt =
9400       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9401 
9402   if (const auto *NestedDir =
9403           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9404     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9405     switch (D.getDirectiveKind()) {
9406     case OMPD_target:
9407       if (isOpenMPDistributeDirective(DKind))
9408         return NestedDir;
9409       if (DKind == OMPD_teams) {
9410         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9411             /*IgnoreCaptured=*/true);
9412         if (!Body)
9413           return nullptr;
9414         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9415         if (const auto *NND =
9416                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9417           DKind = NND->getDirectiveKind();
9418           if (isOpenMPDistributeDirective(DKind))
9419             return NND;
9420         }
9421       }
9422       return nullptr;
9423     case OMPD_target_teams:
9424       if (isOpenMPDistributeDirective(DKind))
9425         return NestedDir;
9426       return nullptr;
9427     case OMPD_target_parallel:
9428     case OMPD_target_simd:
9429     case OMPD_target_parallel_for:
9430     case OMPD_target_parallel_for_simd:
9431       return nullptr;
9432     case OMPD_target_teams_distribute:
9433     case OMPD_target_teams_distribute_simd:
9434     case OMPD_target_teams_distribute_parallel_for:
9435     case OMPD_target_teams_distribute_parallel_for_simd:
9436     case OMPD_parallel:
9437     case OMPD_for:
9438     case OMPD_parallel_for:
9439     case OMPD_parallel_master:
9440     case OMPD_parallel_sections:
9441     case OMPD_for_simd:
9442     case OMPD_parallel_for_simd:
9443     case OMPD_cancel:
9444     case OMPD_cancellation_point:
9445     case OMPD_ordered:
9446     case OMPD_threadprivate:
9447     case OMPD_allocate:
9448     case OMPD_task:
9449     case OMPD_simd:
9450     case OMPD_sections:
9451     case OMPD_section:
9452     case OMPD_single:
9453     case OMPD_master:
9454     case OMPD_critical:
9455     case OMPD_taskyield:
9456     case OMPD_barrier:
9457     case OMPD_taskwait:
9458     case OMPD_taskgroup:
9459     case OMPD_atomic:
9460     case OMPD_flush:
9461     case OMPD_depobj:
9462     case OMPD_scan:
9463     case OMPD_teams:
9464     case OMPD_target_data:
9465     case OMPD_target_exit_data:
9466     case OMPD_target_enter_data:
9467     case OMPD_distribute:
9468     case OMPD_distribute_simd:
9469     case OMPD_distribute_parallel_for:
9470     case OMPD_distribute_parallel_for_simd:
9471     case OMPD_teams_distribute:
9472     case OMPD_teams_distribute_simd:
9473     case OMPD_teams_distribute_parallel_for:
9474     case OMPD_teams_distribute_parallel_for_simd:
9475     case OMPD_target_update:
9476     case OMPD_declare_simd:
9477     case OMPD_declare_variant:
9478     case OMPD_begin_declare_variant:
9479     case OMPD_end_declare_variant:
9480     case OMPD_declare_target:
9481     case OMPD_end_declare_target:
9482     case OMPD_declare_reduction:
9483     case OMPD_declare_mapper:
9484     case OMPD_taskloop:
9485     case OMPD_taskloop_simd:
9486     case OMPD_master_taskloop:
9487     case OMPD_master_taskloop_simd:
9488     case OMPD_parallel_master_taskloop:
9489     case OMPD_parallel_master_taskloop_simd:
9490     case OMPD_requires:
9491     case OMPD_unknown:
9492     default:
9493       llvm_unreachable("Unexpected directive.");
9494     }
9495   }
9496 
9497   return nullptr;
9498 }
9499 
9500 /// Emit the user-defined mapper function. The code generation follows the
9501 /// pattern in the example below.
9502 /// \code
9503 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9504 ///                                           void *base, void *begin,
9505 ///                                           int64_t size, int64_t type) {
9506 ///   // Allocate space for an array section first.
9507 ///   if (size > 1 && !maptype.IsDelete)
9508 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9509 ///                                 size*sizeof(Ty), clearToFrom(type));
9510 ///   // Map members.
9511 ///   for (unsigned i = 0; i < size; i++) {
9512 ///     // For each component specified by this mapper:
9513 ///     for (auto c : all_components) {
9514 ///       if (c.hasMapper())
9515 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9516 ///                       c.arg_type);
9517 ///       else
9518 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9519 ///                                     c.arg_begin, c.arg_size, c.arg_type);
9520 ///     }
9521 ///   }
9522 ///   // Delete the array section.
9523 ///   if (size > 1 && maptype.IsDelete)
9524 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9525 ///                                 size*sizeof(Ty), clearToFrom(type));
9526 /// }
9527 /// \endcode
9528 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9529                                             CodeGenFunction *CGF) {
9530   if (UDMMap.count(D) > 0)
9531     return;
9532   ASTContext &C = CGM.getContext();
9533   QualType Ty = D->getType();
9534   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9535   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9536   auto *MapperVarDecl =
9537       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9538   SourceLocation Loc = D->getLocation();
9539   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9540 
9541   // Prepare mapper function arguments and attributes.
9542   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9543                               C.VoidPtrTy, ImplicitParamDecl::Other);
9544   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9545                             ImplicitParamDecl::Other);
9546   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9547                              C.VoidPtrTy, ImplicitParamDecl::Other);
9548   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9549                             ImplicitParamDecl::Other);
9550   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9551                             ImplicitParamDecl::Other);
9552   FunctionArgList Args;
9553   Args.push_back(&HandleArg);
9554   Args.push_back(&BaseArg);
9555   Args.push_back(&BeginArg);
9556   Args.push_back(&SizeArg);
9557   Args.push_back(&TypeArg);
9558   const CGFunctionInfo &FnInfo =
9559       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9560   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9561   SmallString<64> TyStr;
9562   llvm::raw_svector_ostream Out(TyStr);
9563   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9564   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9565   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9566                                     Name, &CGM.getModule());
9567   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9568   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9569   // Start the mapper function code generation.
9570   CodeGenFunction MapperCGF(CGM);
9571   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9572   // Compute the starting and end addreses of array elements.
9573   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9574       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9575       C.getPointerType(Int64Ty), Loc);
9576   // Convert the size in bytes into the number of array elements.
9577   Size = MapperCGF.Builder.CreateExactUDiv(
9578       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9579   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9580       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9581       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9582   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9583   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9584       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9585       C.getPointerType(Int64Ty), Loc);
9586   // Prepare common arguments for array initiation and deletion.
9587   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9588       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9589       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9590   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9591       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9592       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9593   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9594       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9595       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9596 
9597   // Emit array initiation if this is an array section and \p MapType indicates
9598   // that memory allocation is required.
9599   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9600   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9601                              ElementSize, HeadBB, /*IsInit=*/true);
9602 
9603   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9604 
9605   // Emit the loop header block.
9606   MapperCGF.EmitBlock(HeadBB);
9607   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9608   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9609   // Evaluate whether the initial condition is satisfied.
9610   llvm::Value *IsEmpty =
9611       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9612   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9613   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9614 
9615   // Emit the loop body block.
9616   MapperCGF.EmitBlock(BodyBB);
9617   llvm::BasicBlock *LastBB = BodyBB;
9618   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9619       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9620   PtrPHI->addIncoming(PtrBegin, EntryBB);
9621   Address PtrCurrent =
9622       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9623                           .getAlignment()
9624                           .alignmentOfArrayElement(ElementSize));
9625   // Privatize the declared variable of mapper to be the current array element.
9626   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9627   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9628     return MapperCGF
9629         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9630         .getAddress(MapperCGF);
9631   });
9632   (void)Scope.Privatize();
9633 
9634   // Get map clause information. Fill up the arrays with all mapped variables.
9635   MappableExprsHandler::MapCombinedInfoTy Info;
9636   MappableExprsHandler MEHandler(*D, MapperCGF);
9637   MEHandler.generateAllInfoForMapper(Info);
9638 
9639   // Call the runtime API __tgt_mapper_num_components to get the number of
9640   // pre-existing components.
9641   llvm::Value *OffloadingArgs[] = {Handle};
9642   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9643       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9644                                             OMPRTL___tgt_mapper_num_components),
9645       OffloadingArgs);
9646   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9647       PreviousSize,
9648       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9649 
9650   // Fill up the runtime mapper handle for all components.
9651   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9652     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9653         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9654     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9655         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9656     llvm::Value *CurSizeArg = Info.Sizes[I];
9657 
9658     // Extract the MEMBER_OF field from the map type.
9659     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9660     MapperCGF.EmitBlock(MemberBB);
9661     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9662     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9663         OriMapType,
9664         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9665     llvm::BasicBlock *MemberCombineBB =
9666         MapperCGF.createBasicBlock("omp.member.combine");
9667     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9668     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9669     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9670     // Add the number of pre-existing components to the MEMBER_OF field if it
9671     // is valid.
9672     MapperCGF.EmitBlock(MemberCombineBB);
9673     llvm::Value *CombinedMember =
9674         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9675     // Do nothing if it is not a member of previous components.
9676     MapperCGF.EmitBlock(TypeBB);
9677     llvm::PHINode *MemberMapType =
9678         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9679     MemberMapType->addIncoming(OriMapType, MemberBB);
9680     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9681 
9682     // Combine the map type inherited from user-defined mapper with that
9683     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9684     // bits of the \a MapType, which is the input argument of the mapper
9685     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9686     // bits of MemberMapType.
9687     // [OpenMP 5.0], 1.2.6. map-type decay.
9688     //        | alloc |  to   | from  | tofrom | release | delete
9689     // ----------------------------------------------------------
9690     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9691     // to     | alloc |  to   | alloc |   to   | release | delete
9692     // from   | alloc | alloc | from  |  from  | release | delete
9693     // tofrom | alloc |  to   | from  | tofrom | release | delete
9694     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9695         MapType,
9696         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9697                                    MappableExprsHandler::OMP_MAP_FROM));
9698     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9699     llvm::BasicBlock *AllocElseBB =
9700         MapperCGF.createBasicBlock("omp.type.alloc.else");
9701     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9702     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9703     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9704     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9705     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9706     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9707     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9708     MapperCGF.EmitBlock(AllocBB);
9709     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9710         MemberMapType,
9711         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9712                                      MappableExprsHandler::OMP_MAP_FROM)));
9713     MapperCGF.Builder.CreateBr(EndBB);
9714     MapperCGF.EmitBlock(AllocElseBB);
9715     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9716         LeftToFrom,
9717         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9718     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9719     // In case of to, clear OMP_MAP_FROM.
9720     MapperCGF.EmitBlock(ToBB);
9721     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9722         MemberMapType,
9723         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9724     MapperCGF.Builder.CreateBr(EndBB);
9725     MapperCGF.EmitBlock(ToElseBB);
9726     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9727         LeftToFrom,
9728         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9729     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9730     // In case of from, clear OMP_MAP_TO.
9731     MapperCGF.EmitBlock(FromBB);
9732     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9733         MemberMapType,
9734         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9735     // In case of tofrom, do nothing.
9736     MapperCGF.EmitBlock(EndBB);
9737     LastBB = EndBB;
9738     llvm::PHINode *CurMapType =
9739         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9740     CurMapType->addIncoming(AllocMapType, AllocBB);
9741     CurMapType->addIncoming(ToMapType, ToBB);
9742     CurMapType->addIncoming(FromMapType, FromBB);
9743     CurMapType->addIncoming(MemberMapType, ToElseBB);
9744 
9745     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9746                                      CurSizeArg, CurMapType};
9747     if (Info.Mappers[I]) {
9748       // Call the corresponding mapper function.
9749       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9750           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9751       assert(MapperFunc && "Expect a valid mapper function is available.");
9752       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9753     } else {
9754       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9755       // data structure.
9756       MapperCGF.EmitRuntimeCall(
9757           OMPBuilder.getOrCreateRuntimeFunction(
9758               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9759           OffloadingArgs);
9760     }
9761   }
9762 
9763   // Update the pointer to point to the next element that needs to be mapped,
9764   // and check whether we have mapped all elements.
9765   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9766       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9767   PtrPHI->addIncoming(PtrNext, LastBB);
9768   llvm::Value *IsDone =
9769       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9770   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9771   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9772 
9773   MapperCGF.EmitBlock(ExitBB);
9774   // Emit array deletion if this is an array section and \p MapType indicates
9775   // that deletion is required.
9776   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9777                              ElementSize, DoneBB, /*IsInit=*/false);
9778 
9779   // Emit the function exit block.
9780   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9781   MapperCGF.FinishFunction();
9782   UDMMap.try_emplace(D, Fn);
9783   if (CGF) {
9784     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9785     Decls.second.push_back(D);
9786   }
9787 }
9788 
9789 /// Emit the array initialization or deletion portion for user-defined mapper
9790 /// code generation. First, it evaluates whether an array section is mapped and
9791 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9792 /// true, and \a MapType indicates to not delete this array, array
9793 /// initialization code is generated. If \a IsInit is false, and \a MapType
9794 /// indicates to not this array, array deletion code is generated.
9795 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9796     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9797     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9798     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9799   StringRef Prefix = IsInit ? ".init" : ".del";
9800 
9801   // Evaluate if this is an array section.
9802   llvm::BasicBlock *IsDeleteBB =
9803       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9804   llvm::BasicBlock *BodyBB =
9805       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9806   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9807       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9808   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9809 
9810   // Evaluate if we are going to delete this section.
9811   MapperCGF.EmitBlock(IsDeleteBB);
9812   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9813       MapType,
9814       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9815   llvm::Value *DeleteCond;
9816   if (IsInit) {
9817     DeleteCond = MapperCGF.Builder.CreateIsNull(
9818         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9819   } else {
9820     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9821         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9822   }
9823   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9824 
9825   MapperCGF.EmitBlock(BodyBB);
9826   // Get the array size by multiplying element size and element number (i.e., \p
9827   // Size).
9828   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9829       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9830   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9831   // memory allocation/deletion purpose only.
9832   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9833       MapType,
9834       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9835                                    MappableExprsHandler::OMP_MAP_FROM)));
9836   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9837   // data structure.
9838   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9839   MapperCGF.EmitRuntimeCall(
9840       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9841                                             OMPRTL___tgt_push_mapper_component),
9842       OffloadingArgs);
9843 }
9844 
9845 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9846     const OMPDeclareMapperDecl *D) {
9847   auto I = UDMMap.find(D);
9848   if (I != UDMMap.end())
9849     return I->second;
9850   emitUserDefinedMapper(D);
9851   return UDMMap.lookup(D);
9852 }
9853 
9854 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9855     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9856     llvm::Value *DeviceID,
9857     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9858                                      const OMPLoopDirective &D)>
9859         SizeEmitter) {
9860   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9861   const OMPExecutableDirective *TD = &D;
9862   // Get nested teams distribute kind directive, if any.
9863   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9864     TD = getNestedDistributeDirective(CGM.getContext(), D);
9865   if (!TD)
9866     return;
9867   const auto *LD = cast<OMPLoopDirective>(TD);
9868   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
9869                                                          PrePostActionTy &) {
9870     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9871       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9872       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
9873       CGF.EmitRuntimeCall(
9874           OMPBuilder.getOrCreateRuntimeFunction(
9875               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9876           Args);
9877     }
9878   };
9879   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9880 }
9881 
9882 void CGOpenMPRuntime::emitTargetCall(
9883     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9884     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9885     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9886     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9887                                      const OMPLoopDirective &D)>
9888         SizeEmitter) {
9889   if (!CGF.HaveInsertPoint())
9890     return;
9891 
9892   assert(OutlinedFn && "Invalid outlined function!");
9893 
9894   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9895                                  D.hasClausesOfKind<OMPNowaitClause>();
9896   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9897   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9898   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9899                                             PrePostActionTy &) {
9900     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9901   };
9902   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9903 
9904   CodeGenFunction::OMPTargetDataInfo InputInfo;
9905   llvm::Value *MapTypesArray = nullptr;
9906   llvm::Value *MapNamesArray = nullptr;
9907   // Fill up the pointer arrays and transfer execution to the device.
9908   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9909                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
9910                     &CapturedVars,
9911                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9912     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9913       // Reverse offloading is not supported, so just execute on the host.
9914       if (RequiresOuterTask) {
9915         CapturedVars.clear();
9916         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9917       }
9918       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9919       return;
9920     }
9921 
9922     // On top of the arrays that were filled up, the target offloading call
9923     // takes as arguments the device id as well as the host pointer. The host
9924     // pointer is used by the runtime library to identify the current target
9925     // region, so it only has to be unique and not necessarily point to
9926     // anything. It could be the pointer to the outlined function that
9927     // implements the target region, but we aren't using that so that the
9928     // compiler doesn't need to keep that, and could therefore inline the host
9929     // function if proven worthwhile during optimization.
9930 
9931     // From this point on, we need to have an ID of the target region defined.
9932     assert(OutlinedFnID && "Invalid outlined function ID!");
9933 
9934     // Emit device ID if any.
9935     llvm::Value *DeviceID;
9936     if (Device.getPointer()) {
9937       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9938               Device.getInt() == OMPC_DEVICE_device_num) &&
9939              "Expected device_num modifier.");
9940       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9941       DeviceID =
9942           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9943     } else {
9944       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9945     }
9946 
9947     // Emit the number of elements in the offloading arrays.
9948     llvm::Value *PointerNum =
9949         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9950 
9951     // Return value of the runtime offloading call.
9952     llvm::Value *Return;
9953 
9954     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9955     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9956 
9957     // Source location for the ident struct
9958     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9959 
9960     // Emit tripcount for the target loop-based directive.
9961     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9962 
9963     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9964     // The target region is an outlined function launched by the runtime
9965     // via calls __tgt_target() or __tgt_target_teams().
9966     //
9967     // __tgt_target() launches a target region with one team and one thread,
9968     // executing a serial region.  This master thread may in turn launch
9969     // more threads within its team upon encountering a parallel region,
9970     // however, no additional teams can be launched on the device.
9971     //
9972     // __tgt_target_teams() launches a target region with one or more teams,
9973     // each with one or more threads.  This call is required for target
9974     // constructs such as:
9975     //  'target teams'
9976     //  'target' / 'teams'
9977     //  'target teams distribute parallel for'
9978     //  'target parallel'
9979     // and so on.
9980     //
9981     // Note that on the host and CPU targets, the runtime implementation of
9982     // these calls simply call the outlined function without forking threads.
9983     // The outlined functions themselves have runtime calls to
9984     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9985     // the compiler in emitTeamsCall() and emitParallelCall().
9986     //
9987     // In contrast, on the NVPTX target, the implementation of
9988     // __tgt_target_teams() launches a GPU kernel with the requested number
9989     // of teams and threads so no additional calls to the runtime are required.
9990     if (NumTeams) {
9991       // If we have NumTeams defined this means that we have an enclosed teams
9992       // region. Therefore we also expect to have NumThreads defined. These two
9993       // values should be defined in the presence of a teams directive,
9994       // regardless of having any clauses associated. If the user is using teams
9995       // but no clauses, these two values will be the default that should be
9996       // passed to the runtime library - a 32-bit integer with the value zero.
9997       assert(NumThreads && "Thread limit expression should be available along "
9998                            "with number of teams.");
9999       llvm::Value *OffloadingArgs[] = {RTLoc,
10000                                        DeviceID,
10001                                        OutlinedFnID,
10002                                        PointerNum,
10003                                        InputInfo.BasePointersArray.getPointer(),
10004                                        InputInfo.PointersArray.getPointer(),
10005                                        InputInfo.SizesArray.getPointer(),
10006                                        MapTypesArray,
10007                                        MapNamesArray,
10008                                        InputInfo.MappersArray.getPointer(),
10009                                        NumTeams,
10010                                        NumThreads};
10011       Return = CGF.EmitRuntimeCall(
10012           OMPBuilder.getOrCreateRuntimeFunction(
10013               CGM.getModule(), HasNowait
10014                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10015                                    : OMPRTL___tgt_target_teams_mapper),
10016           OffloadingArgs);
10017     } else {
10018       llvm::Value *OffloadingArgs[] = {RTLoc,
10019                                        DeviceID,
10020                                        OutlinedFnID,
10021                                        PointerNum,
10022                                        InputInfo.BasePointersArray.getPointer(),
10023                                        InputInfo.PointersArray.getPointer(),
10024                                        InputInfo.SizesArray.getPointer(),
10025                                        MapTypesArray,
10026                                        MapNamesArray,
10027                                        InputInfo.MappersArray.getPointer()};
10028       Return = CGF.EmitRuntimeCall(
10029           OMPBuilder.getOrCreateRuntimeFunction(
10030               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10031                                          : OMPRTL___tgt_target_mapper),
10032           OffloadingArgs);
10033     }
10034 
10035     // Check the error code and execute the host version if required.
10036     llvm::BasicBlock *OffloadFailedBlock =
10037         CGF.createBasicBlock("omp_offload.failed");
10038     llvm::BasicBlock *OffloadContBlock =
10039         CGF.createBasicBlock("omp_offload.cont");
10040     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10041     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10042 
10043     CGF.EmitBlock(OffloadFailedBlock);
10044     if (RequiresOuterTask) {
10045       CapturedVars.clear();
10046       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10047     }
10048     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10049     CGF.EmitBranch(OffloadContBlock);
10050 
10051     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10052   };
10053 
10054   // Notify that the host version must be executed.
10055   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10056                     RequiresOuterTask](CodeGenFunction &CGF,
10057                                        PrePostActionTy &) {
10058     if (RequiresOuterTask) {
10059       CapturedVars.clear();
10060       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10061     }
10062     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10063   };
10064 
10065   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10066                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10067                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10068     // Fill up the arrays with all the captured variables.
10069     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10070 
10071     // Get mappable expression information.
10072     MappableExprsHandler MEHandler(D, CGF);
10073     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10074     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10075 
10076     auto RI = CS.getCapturedRecordDecl()->field_begin();
10077     auto CV = CapturedVars.begin();
10078     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10079                                               CE = CS.capture_end();
10080          CI != CE; ++CI, ++RI, ++CV) {
10081       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10082       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10083 
10084       // VLA sizes are passed to the outlined region by copy and do not have map
10085       // information associated.
10086       if (CI->capturesVariableArrayType()) {
10087         CurInfo.Exprs.push_back(nullptr);
10088         CurInfo.BasePointers.push_back(*CV);
10089         CurInfo.Pointers.push_back(*CV);
10090         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10091             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10092         // Copy to the device as an argument. No need to retrieve it.
10093         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10094                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10095                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10096         CurInfo.Mappers.push_back(nullptr);
10097       } else {
10098         // If we have any information in the map clause, we use it, otherwise we
10099         // just do a default mapping.
10100         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10101         if (!CI->capturesThis())
10102           MappedVarSet.insert(CI->getCapturedVar());
10103         else
10104           MappedVarSet.insert(nullptr);
10105         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10106           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10107         // Generate correct mapping for variables captured by reference in
10108         // lambdas.
10109         if (CI->capturesVariable())
10110           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10111                                                   CurInfo, LambdaPointers);
10112       }
10113       // We expect to have at least an element of information for this capture.
10114       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10115              "Non-existing map pointer for capture!");
10116       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10117              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10118              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10119              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10120              "Inconsistent map information sizes!");
10121 
10122       // If there is an entry in PartialStruct it means we have a struct with
10123       // individual members mapped. Emit an extra combined entry.
10124       if (PartialStruct.Base.isValid())
10125         MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
10126                                     nullptr, /*NoTargetParam=*/false);
10127 
10128       // We need to append the results of this capture to what we already have.
10129       CombinedInfo.append(CurInfo);
10130     }
10131     // Adjust MEMBER_OF flags for the lambdas captures.
10132     MEHandler.adjustMemberOfForLambdaCaptures(
10133         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10134         CombinedInfo.Types);
10135     // Map any list items in a map clause that were not captures because they
10136     // weren't referenced within the construct.
10137     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10138 
10139     TargetDataInfo Info;
10140     // Fill up the arrays and create the arguments.
10141     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10142     emitOffloadingArraysArgument(
10143         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10144         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10145         {/*ForEndTask=*/false});
10146 
10147     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10148     InputInfo.BasePointersArray =
10149         Address(Info.BasePointersArray, CGM.getPointerAlign());
10150     InputInfo.PointersArray =
10151         Address(Info.PointersArray, CGM.getPointerAlign());
10152     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10153     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10154     MapTypesArray = Info.MapTypesArray;
10155     MapNamesArray = Info.MapNamesArray;
10156     if (RequiresOuterTask)
10157       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10158     else
10159       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10160   };
10161 
10162   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10163                              CodeGenFunction &CGF, PrePostActionTy &) {
10164     if (RequiresOuterTask) {
10165       CodeGenFunction::OMPTargetDataInfo InputInfo;
10166       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10167     } else {
10168       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10169     }
10170   };
10171 
10172   // If we have a target function ID it means that we need to support
10173   // offloading, otherwise, just execute on the host. We need to execute on host
10174   // regardless of the conditional in the if clause if, e.g., the user do not
10175   // specify target triples.
10176   if (OutlinedFnID) {
10177     if (IfCond) {
10178       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10179     } else {
10180       RegionCodeGenTy ThenRCG(TargetThenGen);
10181       ThenRCG(CGF);
10182     }
10183   } else {
10184     RegionCodeGenTy ElseRCG(TargetElseGen);
10185     ElseRCG(CGF);
10186   }
10187 }
10188 
10189 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10190                                                     StringRef ParentName) {
10191   if (!S)
10192     return;
10193 
10194   // Codegen OMP target directives that offload compute to the device.
10195   bool RequiresDeviceCodegen =
10196       isa<OMPExecutableDirective>(S) &&
10197       isOpenMPTargetExecutionDirective(
10198           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10199 
10200   if (RequiresDeviceCodegen) {
10201     const auto &E = *cast<OMPExecutableDirective>(S);
10202     unsigned DeviceID;
10203     unsigned FileID;
10204     unsigned Line;
10205     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10206                              FileID, Line);
10207 
10208     // Is this a target region that should not be emitted as an entry point? If
10209     // so just signal we are done with this target region.
10210     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10211                                                             ParentName, Line))
10212       return;
10213 
10214     switch (E.getDirectiveKind()) {
10215     case OMPD_target:
10216       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10217                                                    cast<OMPTargetDirective>(E));
10218       break;
10219     case OMPD_target_parallel:
10220       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10221           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10222       break;
10223     case OMPD_target_teams:
10224       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10225           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10226       break;
10227     case OMPD_target_teams_distribute:
10228       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10229           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10230       break;
10231     case OMPD_target_teams_distribute_simd:
10232       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10233           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10234       break;
10235     case OMPD_target_parallel_for:
10236       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10237           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10238       break;
10239     case OMPD_target_parallel_for_simd:
10240       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10241           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10242       break;
10243     case OMPD_target_simd:
10244       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10245           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10246       break;
10247     case OMPD_target_teams_distribute_parallel_for:
10248       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10249           CGM, ParentName,
10250           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10251       break;
10252     case OMPD_target_teams_distribute_parallel_for_simd:
10253       CodeGenFunction::
10254           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10255               CGM, ParentName,
10256               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10257       break;
10258     case OMPD_parallel:
10259     case OMPD_for:
10260     case OMPD_parallel_for:
10261     case OMPD_parallel_master:
10262     case OMPD_parallel_sections:
10263     case OMPD_for_simd:
10264     case OMPD_parallel_for_simd:
10265     case OMPD_cancel:
10266     case OMPD_cancellation_point:
10267     case OMPD_ordered:
10268     case OMPD_threadprivate:
10269     case OMPD_allocate:
10270     case OMPD_task:
10271     case OMPD_simd:
10272     case OMPD_sections:
10273     case OMPD_section:
10274     case OMPD_single:
10275     case OMPD_master:
10276     case OMPD_critical:
10277     case OMPD_taskyield:
10278     case OMPD_barrier:
10279     case OMPD_taskwait:
10280     case OMPD_taskgroup:
10281     case OMPD_atomic:
10282     case OMPD_flush:
10283     case OMPD_depobj:
10284     case OMPD_scan:
10285     case OMPD_teams:
10286     case OMPD_target_data:
10287     case OMPD_target_exit_data:
10288     case OMPD_target_enter_data:
10289     case OMPD_distribute:
10290     case OMPD_distribute_simd:
10291     case OMPD_distribute_parallel_for:
10292     case OMPD_distribute_parallel_for_simd:
10293     case OMPD_teams_distribute:
10294     case OMPD_teams_distribute_simd:
10295     case OMPD_teams_distribute_parallel_for:
10296     case OMPD_teams_distribute_parallel_for_simd:
10297     case OMPD_target_update:
10298     case OMPD_declare_simd:
10299     case OMPD_declare_variant:
10300     case OMPD_begin_declare_variant:
10301     case OMPD_end_declare_variant:
10302     case OMPD_declare_target:
10303     case OMPD_end_declare_target:
10304     case OMPD_declare_reduction:
10305     case OMPD_declare_mapper:
10306     case OMPD_taskloop:
10307     case OMPD_taskloop_simd:
10308     case OMPD_master_taskloop:
10309     case OMPD_master_taskloop_simd:
10310     case OMPD_parallel_master_taskloop:
10311     case OMPD_parallel_master_taskloop_simd:
10312     case OMPD_requires:
10313     case OMPD_unknown:
10314     default:
10315       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10316     }
10317     return;
10318   }
10319 
10320   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10321     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10322       return;
10323 
10324     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10325     return;
10326   }
10327 
10328   // If this is a lambda function, look into its body.
10329   if (const auto *L = dyn_cast<LambdaExpr>(S))
10330     S = L->getBody();
10331 
10332   // Keep looking for target regions recursively.
10333   for (const Stmt *II : S->children())
10334     scanForTargetRegionsFunctions(II, ParentName);
10335 }
10336 
10337 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10338   // If emitting code for the host, we do not process FD here. Instead we do
10339   // the normal code generation.
10340   if (!CGM.getLangOpts().OpenMPIsDevice) {
10341     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10342       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10343           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10344       // Do not emit device_type(nohost) functions for the host.
10345       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10346         return true;
10347     }
10348     return false;
10349   }
10350 
10351   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10352   // Try to detect target regions in the function.
10353   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10354     StringRef Name = CGM.getMangledName(GD);
10355     scanForTargetRegionsFunctions(FD->getBody(), Name);
10356     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10357         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10358     // Do not emit device_type(nohost) functions for the host.
10359     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10360       return true;
10361   }
10362 
10363   // Do not to emit function if it is not marked as declare target.
10364   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10365          AlreadyEmittedTargetDecls.count(VD) == 0;
10366 }
10367 
10368 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10369   if (!CGM.getLangOpts().OpenMPIsDevice)
10370     return false;
10371 
10372   // Check if there are Ctors/Dtors in this declaration and look for target
10373   // regions in it. We use the complete variant to produce the kernel name
10374   // mangling.
10375   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10376   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10377     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10378       StringRef ParentName =
10379           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10380       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10381     }
10382     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10383       StringRef ParentName =
10384           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10385       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10386     }
10387   }
10388 
10389   // Do not to emit variable if it is not marked as declare target.
10390   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10391       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10392           cast<VarDecl>(GD.getDecl()));
10393   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10394       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10395        HasRequiresUnifiedSharedMemory)) {
10396     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10397     return true;
10398   }
10399   return false;
10400 }
10401 
10402 llvm::Constant *
10403 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10404                                                 const VarDecl *VD) {
10405   assert(VD->getType().isConstant(CGM.getContext()) &&
10406          "Expected constant variable.");
10407   StringRef VarName;
10408   llvm::Constant *Addr;
10409   llvm::GlobalValue::LinkageTypes Linkage;
10410   QualType Ty = VD->getType();
10411   SmallString<128> Buffer;
10412   {
10413     unsigned DeviceID;
10414     unsigned FileID;
10415     unsigned Line;
10416     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10417                              FileID, Line);
10418     llvm::raw_svector_ostream OS(Buffer);
10419     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10420        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10421     VarName = OS.str();
10422   }
10423   Linkage = llvm::GlobalValue::InternalLinkage;
10424   Addr =
10425       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10426                                   getDefaultFirstprivateAddressSpace());
10427   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10428   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10429   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10430   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10431       VarName, Addr, VarSize,
10432       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10433   return Addr;
10434 }
10435 
10436 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10437                                                    llvm::Constant *Addr) {
10438   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10439       !CGM.getLangOpts().OpenMPIsDevice)
10440     return;
10441   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10442       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10443   if (!Res) {
10444     if (CGM.getLangOpts().OpenMPIsDevice) {
10445       // Register non-target variables being emitted in device code (debug info
10446       // may cause this).
10447       StringRef VarName = CGM.getMangledName(VD);
10448       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10449     }
10450     return;
10451   }
10452   // Register declare target variables.
10453   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10454   StringRef VarName;
10455   CharUnits VarSize;
10456   llvm::GlobalValue::LinkageTypes Linkage;
10457 
10458   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10459       !HasRequiresUnifiedSharedMemory) {
10460     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10461     VarName = CGM.getMangledName(VD);
10462     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10463       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10464       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10465     } else {
10466       VarSize = CharUnits::Zero();
10467     }
10468     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10469     // Temp solution to prevent optimizations of the internal variables.
10470     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10471       std::string RefName = getName({VarName, "ref"});
10472       if (!CGM.GetGlobalValue(RefName)) {
10473         llvm::Constant *AddrRef =
10474             getOrCreateInternalVariable(Addr->getType(), RefName);
10475         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10476         GVAddrRef->setConstant(/*Val=*/true);
10477         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10478         GVAddrRef->setInitializer(Addr);
10479         CGM.addCompilerUsedGlobal(GVAddrRef);
10480       }
10481     }
10482   } else {
10483     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10484             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10485              HasRequiresUnifiedSharedMemory)) &&
10486            "Declare target attribute must link or to with unified memory.");
10487     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10488       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10489     else
10490       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10491 
10492     if (CGM.getLangOpts().OpenMPIsDevice) {
10493       VarName = Addr->getName();
10494       Addr = nullptr;
10495     } else {
10496       VarName = getAddrOfDeclareTargetVar(VD).getName();
10497       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10498     }
10499     VarSize = CGM.getPointerSize();
10500     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10501   }
10502 
10503   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10504       VarName, Addr, VarSize, Flags, Linkage);
10505 }
10506 
10507 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10508   if (isa<FunctionDecl>(GD.getDecl()) ||
10509       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10510     return emitTargetFunctions(GD);
10511 
10512   return emitTargetGlobalVariable(GD);
10513 }
10514 
10515 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10516   for (const VarDecl *VD : DeferredGlobalVariables) {
10517     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10518         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10519     if (!Res)
10520       continue;
10521     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10522         !HasRequiresUnifiedSharedMemory) {
10523       CGM.EmitGlobal(VD);
10524     } else {
10525       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10526               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10527                HasRequiresUnifiedSharedMemory)) &&
10528              "Expected link clause or to clause with unified memory.");
10529       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10530     }
10531   }
10532 }
10533 
10534 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10535     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10536   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10537          " Expected target-based directive.");
10538 }
10539 
10540 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10541   for (const OMPClause *Clause : D->clauselists()) {
10542     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10543       HasRequiresUnifiedSharedMemory = true;
10544     } else if (const auto *AC =
10545                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10546       switch (AC->getAtomicDefaultMemOrderKind()) {
10547       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10548         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10549         break;
10550       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10551         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10552         break;
10553       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10554         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10555         break;
10556       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10557         break;
10558       }
10559     }
10560   }
10561 }
10562 
10563 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10564   return RequiresAtomicOrdering;
10565 }
10566 
10567 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10568                                                        LangAS &AS) {
10569   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10570     return false;
10571   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10572   switch(A->getAllocatorType()) {
10573   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10574   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10575   // Not supported, fallback to the default mem space.
10576   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10577   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10578   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10579   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10580   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10581   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10582   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10583     AS = LangAS::Default;
10584     return true;
10585   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10586     llvm_unreachable("Expected predefined allocator for the variables with the "
10587                      "static storage.");
10588   }
10589   return false;
10590 }
10591 
10592 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10593   return HasRequiresUnifiedSharedMemory;
10594 }
10595 
10596 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10597     CodeGenModule &CGM)
10598     : CGM(CGM) {
10599   if (CGM.getLangOpts().OpenMPIsDevice) {
10600     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10601     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10602   }
10603 }
10604 
10605 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10606   if (CGM.getLangOpts().OpenMPIsDevice)
10607     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10608 }
10609 
10610 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10611   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10612     return true;
10613 
10614   const auto *D = cast<FunctionDecl>(GD.getDecl());
10615   // Do not to emit function if it is marked as declare target as it was already
10616   // emitted.
10617   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10618     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10619       if (auto *F = dyn_cast_or_null<llvm::Function>(
10620               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10621         return !F->isDeclaration();
10622       return false;
10623     }
10624     return true;
10625   }
10626 
10627   return !AlreadyEmittedTargetDecls.insert(D).second;
10628 }
10629 
10630 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10631   // If we don't have entries or if we are emitting code for the device, we
10632   // don't need to do anything.
10633   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10634       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10635       (OffloadEntriesInfoManager.empty() &&
10636        !HasEmittedDeclareTargetRegion &&
10637        !HasEmittedTargetRegion))
10638     return nullptr;
10639 
10640   // Create and register the function that handles the requires directives.
10641   ASTContext &C = CGM.getContext();
10642 
10643   llvm::Function *RequiresRegFn;
10644   {
10645     CodeGenFunction CGF(CGM);
10646     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10647     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10648     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10649     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10650     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10651     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10652     // TODO: check for other requires clauses.
10653     // The requires directive takes effect only when a target region is
10654     // present in the compilation unit. Otherwise it is ignored and not
10655     // passed to the runtime. This avoids the runtime from throwing an error
10656     // for mismatching requires clauses across compilation units that don't
10657     // contain at least 1 target region.
10658     assert((HasEmittedTargetRegion ||
10659             HasEmittedDeclareTargetRegion ||
10660             !OffloadEntriesInfoManager.empty()) &&
10661            "Target or declare target region expected.");
10662     if (HasRequiresUnifiedSharedMemory)
10663       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10664     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10665                             CGM.getModule(), OMPRTL___tgt_register_requires),
10666                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10667     CGF.FinishFunction();
10668   }
10669   return RequiresRegFn;
10670 }
10671 
10672 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10673                                     const OMPExecutableDirective &D,
10674                                     SourceLocation Loc,
10675                                     llvm::Function *OutlinedFn,
10676                                     ArrayRef<llvm::Value *> CapturedVars) {
10677   if (!CGF.HaveInsertPoint())
10678     return;
10679 
10680   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10681   CodeGenFunction::RunCleanupsScope Scope(CGF);
10682 
10683   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10684   llvm::Value *Args[] = {
10685       RTLoc,
10686       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10687       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10688   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10689   RealArgs.append(std::begin(Args), std::end(Args));
10690   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10691 
10692   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10693       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10694   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10695 }
10696 
10697 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10698                                          const Expr *NumTeams,
10699                                          const Expr *ThreadLimit,
10700                                          SourceLocation Loc) {
10701   if (!CGF.HaveInsertPoint())
10702     return;
10703 
10704   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10705 
10706   llvm::Value *NumTeamsVal =
10707       NumTeams
10708           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10709                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10710           : CGF.Builder.getInt32(0);
10711 
10712   llvm::Value *ThreadLimitVal =
10713       ThreadLimit
10714           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10715                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10716           : CGF.Builder.getInt32(0);
10717 
10718   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10719   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10720                                      ThreadLimitVal};
10721   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10722                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10723                       PushNumTeamsArgs);
10724 }
10725 
10726 void CGOpenMPRuntime::emitTargetDataCalls(
10727     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10728     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10729   if (!CGF.HaveInsertPoint())
10730     return;
10731 
10732   // Action used to replace the default codegen action and turn privatization
10733   // off.
10734   PrePostActionTy NoPrivAction;
10735 
10736   // Generate the code for the opening of the data environment. Capture all the
10737   // arguments of the runtime call by reference because they are used in the
10738   // closing of the region.
10739   auto &&BeginThenGen = [this, &D, Device, &Info,
10740                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10741     // Fill up the arrays with all the mapped variables.
10742     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10743 
10744     // Get map clause information.
10745     MappableExprsHandler MEHandler(D, CGF);
10746     MEHandler.generateAllInfo(CombinedInfo);
10747 
10748     // Fill up the arrays and create the arguments.
10749     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10750                          /*IsNonContiguous=*/true);
10751 
10752     llvm::Value *BasePointersArrayArg = nullptr;
10753     llvm::Value *PointersArrayArg = nullptr;
10754     llvm::Value *SizesArrayArg = nullptr;
10755     llvm::Value *MapTypesArrayArg = nullptr;
10756     llvm::Value *MapNamesArrayArg = nullptr;
10757     llvm::Value *MappersArrayArg = nullptr;
10758     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10759                                  SizesArrayArg, MapTypesArrayArg,
10760                                  MapNamesArrayArg, MappersArrayArg, Info);
10761 
10762     // Emit device ID if any.
10763     llvm::Value *DeviceID = nullptr;
10764     if (Device) {
10765       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10766                                            CGF.Int64Ty, /*isSigned=*/true);
10767     } else {
10768       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10769     }
10770 
10771     // Emit the number of elements in the offloading arrays.
10772     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10773     //
10774     // Source location for the ident struct
10775     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10776 
10777     llvm::Value *OffloadingArgs[] = {RTLoc,
10778                                      DeviceID,
10779                                      PointerNum,
10780                                      BasePointersArrayArg,
10781                                      PointersArrayArg,
10782                                      SizesArrayArg,
10783                                      MapTypesArrayArg,
10784                                      MapNamesArrayArg,
10785                                      MappersArrayArg};
10786     CGF.EmitRuntimeCall(
10787         OMPBuilder.getOrCreateRuntimeFunction(
10788             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10789         OffloadingArgs);
10790 
10791     // If device pointer privatization is required, emit the body of the region
10792     // here. It will have to be duplicated: with and without privatization.
10793     if (!Info.CaptureDeviceAddrMap.empty())
10794       CodeGen(CGF);
10795   };
10796 
10797   // Generate code for the closing of the data region.
10798   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10799                                                 PrePostActionTy &) {
10800     assert(Info.isValid() && "Invalid data environment closing arguments.");
10801 
10802     llvm::Value *BasePointersArrayArg = nullptr;
10803     llvm::Value *PointersArrayArg = nullptr;
10804     llvm::Value *SizesArrayArg = nullptr;
10805     llvm::Value *MapTypesArrayArg = nullptr;
10806     llvm::Value *MapNamesArrayArg = nullptr;
10807     llvm::Value *MappersArrayArg = nullptr;
10808     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10809                                  SizesArrayArg, MapTypesArrayArg,
10810                                  MapNamesArrayArg, MappersArrayArg, Info,
10811                                  {/*ForEndCall=*/true});
10812 
10813     // Emit device ID if any.
10814     llvm::Value *DeviceID = nullptr;
10815     if (Device) {
10816       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10817                                            CGF.Int64Ty, /*isSigned=*/true);
10818     } else {
10819       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10820     }
10821 
10822     // Emit the number of elements in the offloading arrays.
10823     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10824 
10825     // Source location for the ident struct
10826     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10827 
10828     llvm::Value *OffloadingArgs[] = {RTLoc,
10829                                      DeviceID,
10830                                      PointerNum,
10831                                      BasePointersArrayArg,
10832                                      PointersArrayArg,
10833                                      SizesArrayArg,
10834                                      MapTypesArrayArg,
10835                                      MapNamesArrayArg,
10836                                      MappersArrayArg};
10837     CGF.EmitRuntimeCall(
10838         OMPBuilder.getOrCreateRuntimeFunction(
10839             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10840         OffloadingArgs);
10841   };
10842 
10843   // If we need device pointer privatization, we need to emit the body of the
10844   // region with no privatization in the 'else' branch of the conditional.
10845   // Otherwise, we don't have to do anything.
10846   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10847                                                          PrePostActionTy &) {
10848     if (!Info.CaptureDeviceAddrMap.empty()) {
10849       CodeGen.setAction(NoPrivAction);
10850       CodeGen(CGF);
10851     }
10852   };
10853 
10854   // We don't have to do anything to close the region if the if clause evaluates
10855   // to false.
10856   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10857 
10858   if (IfCond) {
10859     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10860   } else {
10861     RegionCodeGenTy RCG(BeginThenGen);
10862     RCG(CGF);
10863   }
10864 
10865   // If we don't require privatization of device pointers, we emit the body in
10866   // between the runtime calls. This avoids duplicating the body code.
10867   if (Info.CaptureDeviceAddrMap.empty()) {
10868     CodeGen.setAction(NoPrivAction);
10869     CodeGen(CGF);
10870   }
10871 
10872   if (IfCond) {
10873     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10874   } else {
10875     RegionCodeGenTy RCG(EndThenGen);
10876     RCG(CGF);
10877   }
10878 }
10879 
10880 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10881     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10882     const Expr *Device) {
10883   if (!CGF.HaveInsertPoint())
10884     return;
10885 
10886   assert((isa<OMPTargetEnterDataDirective>(D) ||
10887           isa<OMPTargetExitDataDirective>(D) ||
10888           isa<OMPTargetUpdateDirective>(D)) &&
10889          "Expecting either target enter, exit data, or update directives.");
10890 
10891   CodeGenFunction::OMPTargetDataInfo InputInfo;
10892   llvm::Value *MapTypesArray = nullptr;
10893   llvm::Value *MapNamesArray = nullptr;
10894   // Generate the code for the opening of the data environment.
10895   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10896                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10897     // Emit device ID if any.
10898     llvm::Value *DeviceID = nullptr;
10899     if (Device) {
10900       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10901                                            CGF.Int64Ty, /*isSigned=*/true);
10902     } else {
10903       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10904     }
10905 
10906     // Emit the number of elements in the offloading arrays.
10907     llvm::Constant *PointerNum =
10908         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10909 
10910     // Source location for the ident struct
10911     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10912 
10913     llvm::Value *OffloadingArgs[] = {RTLoc,
10914                                      DeviceID,
10915                                      PointerNum,
10916                                      InputInfo.BasePointersArray.getPointer(),
10917                                      InputInfo.PointersArray.getPointer(),
10918                                      InputInfo.SizesArray.getPointer(),
10919                                      MapTypesArray,
10920                                      MapNamesArray,
10921                                      InputInfo.MappersArray.getPointer()};
10922 
10923     // Select the right runtime function call for each standalone
10924     // directive.
10925     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10926     RuntimeFunction RTLFn;
10927     switch (D.getDirectiveKind()) {
10928     case OMPD_target_enter_data:
10929       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10930                         : OMPRTL___tgt_target_data_begin_mapper;
10931       break;
10932     case OMPD_target_exit_data:
10933       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10934                         : OMPRTL___tgt_target_data_end_mapper;
10935       break;
10936     case OMPD_target_update:
10937       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10938                         : OMPRTL___tgt_target_data_update_mapper;
10939       break;
10940     case OMPD_parallel:
10941     case OMPD_for:
10942     case OMPD_parallel_for:
10943     case OMPD_parallel_master:
10944     case OMPD_parallel_sections:
10945     case OMPD_for_simd:
10946     case OMPD_parallel_for_simd:
10947     case OMPD_cancel:
10948     case OMPD_cancellation_point:
10949     case OMPD_ordered:
10950     case OMPD_threadprivate:
10951     case OMPD_allocate:
10952     case OMPD_task:
10953     case OMPD_simd:
10954     case OMPD_sections:
10955     case OMPD_section:
10956     case OMPD_single:
10957     case OMPD_master:
10958     case OMPD_critical:
10959     case OMPD_taskyield:
10960     case OMPD_barrier:
10961     case OMPD_taskwait:
10962     case OMPD_taskgroup:
10963     case OMPD_atomic:
10964     case OMPD_flush:
10965     case OMPD_depobj:
10966     case OMPD_scan:
10967     case OMPD_teams:
10968     case OMPD_target_data:
10969     case OMPD_distribute:
10970     case OMPD_distribute_simd:
10971     case OMPD_distribute_parallel_for:
10972     case OMPD_distribute_parallel_for_simd:
10973     case OMPD_teams_distribute:
10974     case OMPD_teams_distribute_simd:
10975     case OMPD_teams_distribute_parallel_for:
10976     case OMPD_teams_distribute_parallel_for_simd:
10977     case OMPD_declare_simd:
10978     case OMPD_declare_variant:
10979     case OMPD_begin_declare_variant:
10980     case OMPD_end_declare_variant:
10981     case OMPD_declare_target:
10982     case OMPD_end_declare_target:
10983     case OMPD_declare_reduction:
10984     case OMPD_declare_mapper:
10985     case OMPD_taskloop:
10986     case OMPD_taskloop_simd:
10987     case OMPD_master_taskloop:
10988     case OMPD_master_taskloop_simd:
10989     case OMPD_parallel_master_taskloop:
10990     case OMPD_parallel_master_taskloop_simd:
10991     case OMPD_target:
10992     case OMPD_target_simd:
10993     case OMPD_target_teams_distribute:
10994     case OMPD_target_teams_distribute_simd:
10995     case OMPD_target_teams_distribute_parallel_for:
10996     case OMPD_target_teams_distribute_parallel_for_simd:
10997     case OMPD_target_teams:
10998     case OMPD_target_parallel:
10999     case OMPD_target_parallel_for:
11000     case OMPD_target_parallel_for_simd:
11001     case OMPD_requires:
11002     case OMPD_unknown:
11003     default:
11004       llvm_unreachable("Unexpected standalone target data directive.");
11005       break;
11006     }
11007     CGF.EmitRuntimeCall(
11008         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11009         OffloadingArgs);
11010   };
11011 
11012   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11013                           &MapNamesArray](CodeGenFunction &CGF,
11014                                           PrePostActionTy &) {
11015     // Fill up the arrays with all the mapped variables.
11016     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11017 
11018     // Get map clause information.
11019     MappableExprsHandler MEHandler(D, CGF);
11020     MEHandler.generateAllInfo(CombinedInfo);
11021 
11022     TargetDataInfo Info;
11023     // Fill up the arrays and create the arguments.
11024     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11025                          /*IsNonContiguous=*/true);
11026     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11027                              D.hasClausesOfKind<OMPNowaitClause>();
11028     emitOffloadingArraysArgument(
11029         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11030         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11031         {/*ForEndTask=*/false});
11032     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11033     InputInfo.BasePointersArray =
11034         Address(Info.BasePointersArray, CGM.getPointerAlign());
11035     InputInfo.PointersArray =
11036         Address(Info.PointersArray, CGM.getPointerAlign());
11037     InputInfo.SizesArray =
11038         Address(Info.SizesArray, CGM.getPointerAlign());
11039     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11040     MapTypesArray = Info.MapTypesArray;
11041     MapNamesArray = Info.MapNamesArray;
11042     if (RequiresOuterTask)
11043       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11044     else
11045       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11046   };
11047 
11048   if (IfCond) {
11049     emitIfClause(CGF, IfCond, TargetThenGen,
11050                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11051   } else {
11052     RegionCodeGenTy ThenRCG(TargetThenGen);
11053     ThenRCG(CGF);
11054   }
11055 }
11056 
11057 namespace {
11058   /// Kind of parameter in a function with 'declare simd' directive.
11059   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11060   /// Attribute set of the parameter.
11061   struct ParamAttrTy {
11062     ParamKindTy Kind = Vector;
11063     llvm::APSInt StrideOrArg;
11064     llvm::APSInt Alignment;
11065   };
11066 } // namespace
11067 
11068 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11069                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11070   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11071   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11072   // of that clause. The VLEN value must be power of 2.
11073   // In other case the notion of the function`s "characteristic data type" (CDT)
11074   // is used to compute the vector length.
11075   // CDT is defined in the following order:
11076   //   a) For non-void function, the CDT is the return type.
11077   //   b) If the function has any non-uniform, non-linear parameters, then the
11078   //   CDT is the type of the first such parameter.
11079   //   c) If the CDT determined by a) or b) above is struct, union, or class
11080   //   type which is pass-by-value (except for the type that maps to the
11081   //   built-in complex data type), the characteristic data type is int.
11082   //   d) If none of the above three cases is applicable, the CDT is int.
11083   // The VLEN is then determined based on the CDT and the size of vector
11084   // register of that ISA for which current vector version is generated. The
11085   // VLEN is computed using the formula below:
11086   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11087   // where vector register size specified in section 3.2.1 Registers and the
11088   // Stack Frame of original AMD64 ABI document.
11089   QualType RetType = FD->getReturnType();
11090   if (RetType.isNull())
11091     return 0;
11092   ASTContext &C = FD->getASTContext();
11093   QualType CDT;
11094   if (!RetType.isNull() && !RetType->isVoidType()) {
11095     CDT = RetType;
11096   } else {
11097     unsigned Offset = 0;
11098     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11099       if (ParamAttrs[Offset].Kind == Vector)
11100         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11101       ++Offset;
11102     }
11103     if (CDT.isNull()) {
11104       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11105         if (ParamAttrs[I + Offset].Kind == Vector) {
11106           CDT = FD->getParamDecl(I)->getType();
11107           break;
11108         }
11109       }
11110     }
11111   }
11112   if (CDT.isNull())
11113     CDT = C.IntTy;
11114   CDT = CDT->getCanonicalTypeUnqualified();
11115   if (CDT->isRecordType() || CDT->isUnionType())
11116     CDT = C.IntTy;
11117   return C.getTypeSize(CDT);
11118 }
11119 
11120 static void
11121 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11122                            const llvm::APSInt &VLENVal,
11123                            ArrayRef<ParamAttrTy> ParamAttrs,
11124                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11125   struct ISADataTy {
11126     char ISA;
11127     unsigned VecRegSize;
11128   };
11129   ISADataTy ISAData[] = {
11130       {
11131           'b', 128
11132       }, // SSE
11133       {
11134           'c', 256
11135       }, // AVX
11136       {
11137           'd', 256
11138       }, // AVX2
11139       {
11140           'e', 512
11141       }, // AVX512
11142   };
11143   llvm::SmallVector<char, 2> Masked;
11144   switch (State) {
11145   case OMPDeclareSimdDeclAttr::BS_Undefined:
11146     Masked.push_back('N');
11147     Masked.push_back('M');
11148     break;
11149   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11150     Masked.push_back('N');
11151     break;
11152   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11153     Masked.push_back('M');
11154     break;
11155   }
11156   for (char Mask : Masked) {
11157     for (const ISADataTy &Data : ISAData) {
11158       SmallString<256> Buffer;
11159       llvm::raw_svector_ostream Out(Buffer);
11160       Out << "_ZGV" << Data.ISA << Mask;
11161       if (!VLENVal) {
11162         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11163         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11164         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11165       } else {
11166         Out << VLENVal;
11167       }
11168       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11169         switch (ParamAttr.Kind){
11170         case LinearWithVarStride:
11171           Out << 's' << ParamAttr.StrideOrArg;
11172           break;
11173         case Linear:
11174           Out << 'l';
11175           if (ParamAttr.StrideOrArg != 1)
11176             Out << ParamAttr.StrideOrArg;
11177           break;
11178         case Uniform:
11179           Out << 'u';
11180           break;
11181         case Vector:
11182           Out << 'v';
11183           break;
11184         }
11185         if (!!ParamAttr.Alignment)
11186           Out << 'a' << ParamAttr.Alignment;
11187       }
11188       Out << '_' << Fn->getName();
11189       Fn->addFnAttr(Out.str());
11190     }
11191   }
11192 }
11193 
11194 // This are the Functions that are needed to mangle the name of the
11195 // vector functions generated by the compiler, according to the rules
11196 // defined in the "Vector Function ABI specifications for AArch64",
11197 // available at
11198 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11199 
11200 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11201 ///
11202 /// TODO: Need to implement the behavior for reference marked with a
11203 /// var or no linear modifiers (1.b in the section). For this, we
11204 /// need to extend ParamKindTy to support the linear modifiers.
11205 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11206   QT = QT.getCanonicalType();
11207 
11208   if (QT->isVoidType())
11209     return false;
11210 
11211   if (Kind == ParamKindTy::Uniform)
11212     return false;
11213 
11214   if (Kind == ParamKindTy::Linear)
11215     return false;
11216 
11217   // TODO: Handle linear references with modifiers
11218 
11219   if (Kind == ParamKindTy::LinearWithVarStride)
11220     return false;
11221 
11222   return true;
11223 }
11224 
11225 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11226 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11227   QT = QT.getCanonicalType();
11228   unsigned Size = C.getTypeSize(QT);
11229 
11230   // Only scalars and complex within 16 bytes wide set PVB to true.
11231   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11232     return false;
11233 
11234   if (QT->isFloatingType())
11235     return true;
11236 
11237   if (QT->isIntegerType())
11238     return true;
11239 
11240   if (QT->isPointerType())
11241     return true;
11242 
11243   // TODO: Add support for complex types (section 3.1.2, item 2).
11244 
11245   return false;
11246 }
11247 
11248 /// Computes the lane size (LS) of a return type or of an input parameter,
11249 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11250 /// TODO: Add support for references, section 3.2.1, item 1.
11251 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11252   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11253     QualType PTy = QT.getCanonicalType()->getPointeeType();
11254     if (getAArch64PBV(PTy, C))
11255       return C.getTypeSize(PTy);
11256   }
11257   if (getAArch64PBV(QT, C))
11258     return C.getTypeSize(QT);
11259 
11260   return C.getTypeSize(C.getUIntPtrType());
11261 }
11262 
11263 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11264 // signature of the scalar function, as defined in 3.2.2 of the
11265 // AAVFABI.
11266 static std::tuple<unsigned, unsigned, bool>
11267 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11268   QualType RetType = FD->getReturnType().getCanonicalType();
11269 
11270   ASTContext &C = FD->getASTContext();
11271 
11272   bool OutputBecomesInput = false;
11273 
11274   llvm::SmallVector<unsigned, 8> Sizes;
11275   if (!RetType->isVoidType()) {
11276     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11277     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11278       OutputBecomesInput = true;
11279   }
11280   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11281     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11282     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11283   }
11284 
11285   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11286   // The LS of a function parameter / return value can only be a power
11287   // of 2, starting from 8 bits, up to 128.
11288   assert(std::all_of(Sizes.begin(), Sizes.end(),
11289                      [](unsigned Size) {
11290                        return Size == 8 || Size == 16 || Size == 32 ||
11291                               Size == 64 || Size == 128;
11292                      }) &&
11293          "Invalid size");
11294 
11295   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11296                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11297                          OutputBecomesInput);
11298 }
11299 
11300 /// Mangle the parameter part of the vector function name according to
11301 /// their OpenMP classification. The mangling function is defined in
11302 /// section 3.5 of the AAVFABI.
11303 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11304   SmallString<256> Buffer;
11305   llvm::raw_svector_ostream Out(Buffer);
11306   for (const auto &ParamAttr : ParamAttrs) {
11307     switch (ParamAttr.Kind) {
11308     case LinearWithVarStride:
11309       Out << "ls" << ParamAttr.StrideOrArg;
11310       break;
11311     case Linear:
11312       Out << 'l';
11313       // Don't print the step value if it is not present or if it is
11314       // equal to 1.
11315       if (ParamAttr.StrideOrArg != 1)
11316         Out << ParamAttr.StrideOrArg;
11317       break;
11318     case Uniform:
11319       Out << 'u';
11320       break;
11321     case Vector:
11322       Out << 'v';
11323       break;
11324     }
11325 
11326     if (!!ParamAttr.Alignment)
11327       Out << 'a' << ParamAttr.Alignment;
11328   }
11329 
11330   return std::string(Out.str());
11331 }
11332 
11333 // Function used to add the attribute. The parameter `VLEN` is
11334 // templated to allow the use of "x" when targeting scalable functions
11335 // for SVE.
11336 template <typename T>
11337 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11338                                  char ISA, StringRef ParSeq,
11339                                  StringRef MangledName, bool OutputBecomesInput,
11340                                  llvm::Function *Fn) {
11341   SmallString<256> Buffer;
11342   llvm::raw_svector_ostream Out(Buffer);
11343   Out << Prefix << ISA << LMask << VLEN;
11344   if (OutputBecomesInput)
11345     Out << "v";
11346   Out << ParSeq << "_" << MangledName;
11347   Fn->addFnAttr(Out.str());
11348 }
11349 
11350 // Helper function to generate the Advanced SIMD names depending on
11351 // the value of the NDS when simdlen is not present.
11352 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11353                                       StringRef Prefix, char ISA,
11354                                       StringRef ParSeq, StringRef MangledName,
11355                                       bool OutputBecomesInput,
11356                                       llvm::Function *Fn) {
11357   switch (NDS) {
11358   case 8:
11359     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11360                          OutputBecomesInput, Fn);
11361     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11362                          OutputBecomesInput, Fn);
11363     break;
11364   case 16:
11365     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11366                          OutputBecomesInput, Fn);
11367     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11368                          OutputBecomesInput, Fn);
11369     break;
11370   case 32:
11371     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11372                          OutputBecomesInput, Fn);
11373     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11374                          OutputBecomesInput, Fn);
11375     break;
11376   case 64:
11377   case 128:
11378     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11379                          OutputBecomesInput, Fn);
11380     break;
11381   default:
11382     llvm_unreachable("Scalar type is too wide.");
11383   }
11384 }
11385 
11386 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11387 static void emitAArch64DeclareSimdFunction(
11388     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11389     ArrayRef<ParamAttrTy> ParamAttrs,
11390     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11391     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11392 
11393   // Get basic data for building the vector signature.
11394   const auto Data = getNDSWDS(FD, ParamAttrs);
11395   const unsigned NDS = std::get<0>(Data);
11396   const unsigned WDS = std::get<1>(Data);
11397   const bool OutputBecomesInput = std::get<2>(Data);
11398 
11399   // Check the values provided via `simdlen` by the user.
11400   // 1. A `simdlen(1)` doesn't produce vector signatures,
11401   if (UserVLEN == 1) {
11402     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11403         DiagnosticsEngine::Warning,
11404         "The clause simdlen(1) has no effect when targeting aarch64.");
11405     CGM.getDiags().Report(SLoc, DiagID);
11406     return;
11407   }
11408 
11409   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11410   // Advanced SIMD output.
11411   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11412     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11413         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11414                                     "power of 2 when targeting Advanced SIMD.");
11415     CGM.getDiags().Report(SLoc, DiagID);
11416     return;
11417   }
11418 
11419   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11420   // limits.
11421   if (ISA == 's' && UserVLEN != 0) {
11422     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11423       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11424           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11425                                       "lanes in the architectural constraints "
11426                                       "for SVE (min is 128-bit, max is "
11427                                       "2048-bit, by steps of 128-bit)");
11428       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11429       return;
11430     }
11431   }
11432 
11433   // Sort out parameter sequence.
11434   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11435   StringRef Prefix = "_ZGV";
11436   // Generate simdlen from user input (if any).
11437   if (UserVLEN) {
11438     if (ISA == 's') {
11439       // SVE generates only a masked function.
11440       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11441                            OutputBecomesInput, Fn);
11442     } else {
11443       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11444       // Advanced SIMD generates one or two functions, depending on
11445       // the `[not]inbranch` clause.
11446       switch (State) {
11447       case OMPDeclareSimdDeclAttr::BS_Undefined:
11448         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11449                              OutputBecomesInput, Fn);
11450         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11451                              OutputBecomesInput, Fn);
11452         break;
11453       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11454         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11455                              OutputBecomesInput, Fn);
11456         break;
11457       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11458         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11459                              OutputBecomesInput, Fn);
11460         break;
11461       }
11462     }
11463   } else {
11464     // If no user simdlen is provided, follow the AAVFABI rules for
11465     // generating the vector length.
11466     if (ISA == 's') {
11467       // SVE, section 3.4.1, item 1.
11468       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11469                            OutputBecomesInput, Fn);
11470     } else {
11471       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11472       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11473       // two vector names depending on the use of the clause
11474       // `[not]inbranch`.
11475       switch (State) {
11476       case OMPDeclareSimdDeclAttr::BS_Undefined:
11477         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11478                                   OutputBecomesInput, Fn);
11479         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11480                                   OutputBecomesInput, Fn);
11481         break;
11482       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11483         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11484                                   OutputBecomesInput, Fn);
11485         break;
11486       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11487         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11488                                   OutputBecomesInput, Fn);
11489         break;
11490       }
11491     }
11492   }
11493 }
11494 
11495 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11496                                               llvm::Function *Fn) {
11497   ASTContext &C = CGM.getContext();
11498   FD = FD->getMostRecentDecl();
11499   // Map params to their positions in function decl.
11500   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11501   if (isa<CXXMethodDecl>(FD))
11502     ParamPositions.try_emplace(FD, 0);
11503   unsigned ParamPos = ParamPositions.size();
11504   for (const ParmVarDecl *P : FD->parameters()) {
11505     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11506     ++ParamPos;
11507   }
11508   while (FD) {
11509     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11510       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11511       // Mark uniform parameters.
11512       for (const Expr *E : Attr->uniforms()) {
11513         E = E->IgnoreParenImpCasts();
11514         unsigned Pos;
11515         if (isa<CXXThisExpr>(E)) {
11516           Pos = ParamPositions[FD];
11517         } else {
11518           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11519                                 ->getCanonicalDecl();
11520           Pos = ParamPositions[PVD];
11521         }
11522         ParamAttrs[Pos].Kind = Uniform;
11523       }
11524       // Get alignment info.
11525       auto NI = Attr->alignments_begin();
11526       for (const Expr *E : Attr->aligneds()) {
11527         E = E->IgnoreParenImpCasts();
11528         unsigned Pos;
11529         QualType ParmTy;
11530         if (isa<CXXThisExpr>(E)) {
11531           Pos = ParamPositions[FD];
11532           ParmTy = E->getType();
11533         } else {
11534           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11535                                 ->getCanonicalDecl();
11536           Pos = ParamPositions[PVD];
11537           ParmTy = PVD->getType();
11538         }
11539         ParamAttrs[Pos].Alignment =
11540             (*NI)
11541                 ? (*NI)->EvaluateKnownConstInt(C)
11542                 : llvm::APSInt::getUnsigned(
11543                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11544                           .getQuantity());
11545         ++NI;
11546       }
11547       // Mark linear parameters.
11548       auto SI = Attr->steps_begin();
11549       auto MI = Attr->modifiers_begin();
11550       for (const Expr *E : Attr->linears()) {
11551         E = E->IgnoreParenImpCasts();
11552         unsigned Pos;
11553         // Rescaling factor needed to compute the linear parameter
11554         // value in the mangled name.
11555         unsigned PtrRescalingFactor = 1;
11556         if (isa<CXXThisExpr>(E)) {
11557           Pos = ParamPositions[FD];
11558         } else {
11559           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11560                                 ->getCanonicalDecl();
11561           Pos = ParamPositions[PVD];
11562           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11563             PtrRescalingFactor = CGM.getContext()
11564                                      .getTypeSizeInChars(P->getPointeeType())
11565                                      .getQuantity();
11566         }
11567         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11568         ParamAttr.Kind = Linear;
11569         // Assuming a stride of 1, for `linear` without modifiers.
11570         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11571         if (*SI) {
11572           Expr::EvalResult Result;
11573           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11574             if (const auto *DRE =
11575                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11576               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11577                 ParamAttr.Kind = LinearWithVarStride;
11578                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11579                     ParamPositions[StridePVD->getCanonicalDecl()]);
11580               }
11581             }
11582           } else {
11583             ParamAttr.StrideOrArg = Result.Val.getInt();
11584           }
11585         }
11586         // If we are using a linear clause on a pointer, we need to
11587         // rescale the value of linear_step with the byte size of the
11588         // pointee type.
11589         if (Linear == ParamAttr.Kind)
11590           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11591         ++SI;
11592         ++MI;
11593       }
11594       llvm::APSInt VLENVal;
11595       SourceLocation ExprLoc;
11596       const Expr *VLENExpr = Attr->getSimdlen();
11597       if (VLENExpr) {
11598         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11599         ExprLoc = VLENExpr->getExprLoc();
11600       }
11601       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11602       if (CGM.getTriple().isX86()) {
11603         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11604       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11605         unsigned VLEN = VLENVal.getExtValue();
11606         StringRef MangledName = Fn->getName();
11607         if (CGM.getTarget().hasFeature("sve"))
11608           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11609                                          MangledName, 's', 128, Fn, ExprLoc);
11610         if (CGM.getTarget().hasFeature("neon"))
11611           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11612                                          MangledName, 'n', 128, Fn, ExprLoc);
11613       }
11614     }
11615     FD = FD->getPreviousDecl();
11616   }
11617 }
11618 
11619 namespace {
11620 /// Cleanup action for doacross support.
11621 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11622 public:
11623   static const int DoacrossFinArgs = 2;
11624 
11625 private:
11626   llvm::FunctionCallee RTLFn;
11627   llvm::Value *Args[DoacrossFinArgs];
11628 
11629 public:
11630   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11631                     ArrayRef<llvm::Value *> CallArgs)
11632       : RTLFn(RTLFn) {
11633     assert(CallArgs.size() == DoacrossFinArgs);
11634     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11635   }
11636   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11637     if (!CGF.HaveInsertPoint())
11638       return;
11639     CGF.EmitRuntimeCall(RTLFn, Args);
11640   }
11641 };
11642 } // namespace
11643 
11644 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11645                                        const OMPLoopDirective &D,
11646                                        ArrayRef<Expr *> NumIterations) {
11647   if (!CGF.HaveInsertPoint())
11648     return;
11649 
11650   ASTContext &C = CGM.getContext();
11651   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11652   RecordDecl *RD;
11653   if (KmpDimTy.isNull()) {
11654     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11655     //  kmp_int64 lo; // lower
11656     //  kmp_int64 up; // upper
11657     //  kmp_int64 st; // stride
11658     // };
11659     RD = C.buildImplicitRecord("kmp_dim");
11660     RD->startDefinition();
11661     addFieldToRecordDecl(C, RD, Int64Ty);
11662     addFieldToRecordDecl(C, RD, Int64Ty);
11663     addFieldToRecordDecl(C, RD, Int64Ty);
11664     RD->completeDefinition();
11665     KmpDimTy = C.getRecordType(RD);
11666   } else {
11667     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11668   }
11669   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11670   QualType ArrayTy =
11671       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11672 
11673   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11674   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11675   enum { LowerFD = 0, UpperFD, StrideFD };
11676   // Fill dims with data.
11677   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11678     LValue DimsLVal = CGF.MakeAddrLValue(
11679         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11680     // dims.upper = num_iterations;
11681     LValue UpperLVal = CGF.EmitLValueForField(
11682         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11683     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11684         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11685         Int64Ty, NumIterations[I]->getExprLoc());
11686     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11687     // dims.stride = 1;
11688     LValue StrideLVal = CGF.EmitLValueForField(
11689         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11690     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11691                           StrideLVal);
11692   }
11693 
11694   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11695   // kmp_int32 num_dims, struct kmp_dim * dims);
11696   llvm::Value *Args[] = {
11697       emitUpdateLocation(CGF, D.getBeginLoc()),
11698       getThreadID(CGF, D.getBeginLoc()),
11699       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11700       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11701           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11702           CGM.VoidPtrTy)};
11703 
11704   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11705       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11706   CGF.EmitRuntimeCall(RTLFn, Args);
11707   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11708       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11709   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11710       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11711   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11712                                              llvm::makeArrayRef(FiniArgs));
11713 }
11714 
11715 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11716                                           const OMPDependClause *C) {
11717   QualType Int64Ty =
11718       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11719   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11720   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11721       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11722   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11723   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11724     const Expr *CounterVal = C->getLoopData(I);
11725     assert(CounterVal);
11726     llvm::Value *CntVal = CGF.EmitScalarConversion(
11727         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11728         CounterVal->getExprLoc());
11729     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11730                           /*Volatile=*/false, Int64Ty);
11731   }
11732   llvm::Value *Args[] = {
11733       emitUpdateLocation(CGF, C->getBeginLoc()),
11734       getThreadID(CGF, C->getBeginLoc()),
11735       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11736   llvm::FunctionCallee RTLFn;
11737   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11738     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11739                                                   OMPRTL___kmpc_doacross_post);
11740   } else {
11741     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11742     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11743                                                   OMPRTL___kmpc_doacross_wait);
11744   }
11745   CGF.EmitRuntimeCall(RTLFn, Args);
11746 }
11747 
11748 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11749                                llvm::FunctionCallee Callee,
11750                                ArrayRef<llvm::Value *> Args) const {
11751   assert(Loc.isValid() && "Outlined function call location must be valid.");
11752   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11753 
11754   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11755     if (Fn->doesNotThrow()) {
11756       CGF.EmitNounwindRuntimeCall(Fn, Args);
11757       return;
11758     }
11759   }
11760   CGF.EmitRuntimeCall(Callee, Args);
11761 }
11762 
11763 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11764     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11765     ArrayRef<llvm::Value *> Args) const {
11766   emitCall(CGF, Loc, OutlinedFn, Args);
11767 }
11768 
11769 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11770   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11771     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11772       HasEmittedDeclareTargetRegion = true;
11773 }
11774 
11775 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11776                                              const VarDecl *NativeParam,
11777                                              const VarDecl *TargetParam) const {
11778   return CGF.GetAddrOfLocalVar(NativeParam);
11779 }
11780 
11781 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11782                                                    const VarDecl *VD) {
11783   if (!VD)
11784     return Address::invalid();
11785   Address UntiedAddr = Address::invalid();
11786   Address UntiedRealAddr = Address::invalid();
11787   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11788   if (It != FunctionToUntiedTaskStackMap.end()) {
11789     const UntiedLocalVarsAddressesMap &UntiedData =
11790         UntiedLocalVarsStack[It->second];
11791     auto I = UntiedData.find(VD);
11792     if (I != UntiedData.end()) {
11793       UntiedAddr = I->second.first;
11794       UntiedRealAddr = I->second.second;
11795     }
11796   }
11797   const VarDecl *CVD = VD->getCanonicalDecl();
11798   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11799     // Use the default allocation.
11800     if (!isAllocatableDecl(VD))
11801       return UntiedAddr;
11802     llvm::Value *Size;
11803     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11804     if (CVD->getType()->isVariablyModifiedType()) {
11805       Size = CGF.getTypeSize(CVD->getType());
11806       // Align the size: ((size + align - 1) / align) * align
11807       Size = CGF.Builder.CreateNUWAdd(
11808           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11809       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11810       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11811     } else {
11812       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11813       Size = CGM.getSize(Sz.alignTo(Align));
11814     }
11815     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11816     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11817     assert(AA->getAllocator() &&
11818            "Expected allocator expression for non-default allocator.");
11819     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11820     // According to the standard, the original allocator type is a enum
11821     // (integer). Convert to pointer type, if required.
11822     Allocator = CGF.EmitScalarConversion(
11823         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
11824         AA->getAllocator()->getExprLoc());
11825     llvm::Value *Args[] = {ThreadID, Size, Allocator};
11826 
11827     llvm::Value *Addr =
11828         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11829                                 CGM.getModule(), OMPRTL___kmpc_alloc),
11830                             Args, getName({CVD->getName(), ".void.addr"}));
11831     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11832         CGM.getModule(), OMPRTL___kmpc_free);
11833     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11834     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11835         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11836     if (UntiedAddr.isValid())
11837       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11838 
11839     // Cleanup action for allocate support.
11840     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11841       llvm::FunctionCallee RTLFn;
11842       unsigned LocEncoding;
11843       Address Addr;
11844       const Expr *Allocator;
11845 
11846     public:
11847       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
11848                            Address Addr, const Expr *Allocator)
11849           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11850             Allocator(Allocator) {}
11851       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11852         if (!CGF.HaveInsertPoint())
11853           return;
11854         llvm::Value *Args[3];
11855         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11856             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11857         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11858             Addr.getPointer(), CGF.VoidPtrTy);
11859         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
11860         // According to the standard, the original allocator type is a enum
11861         // (integer). Convert to pointer type, if required.
11862         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11863                                             CGF.getContext().VoidPtrTy,
11864                                             Allocator->getExprLoc());
11865         Args[2] = AllocVal;
11866 
11867         CGF.EmitRuntimeCall(RTLFn, Args);
11868       }
11869     };
11870     Address VDAddr =
11871         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
11872     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11873         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11874         VDAddr, AA->getAllocator());
11875     if (UntiedRealAddr.isValid())
11876       if (auto *Region =
11877               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11878         Region->emitUntiedSwitch(CGF);
11879     return VDAddr;
11880   }
11881   return UntiedAddr;
11882 }
11883 
11884 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11885                                              const VarDecl *VD) const {
11886   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11887   if (It == FunctionToUntiedTaskStackMap.end())
11888     return false;
11889   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11890 }
11891 
11892 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11893     CodeGenModule &CGM, const OMPLoopDirective &S)
11894     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11895   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11896   if (!NeedToPush)
11897     return;
11898   NontemporalDeclsSet &DS =
11899       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11900   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11901     for (const Stmt *Ref : C->private_refs()) {
11902       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11903       const ValueDecl *VD;
11904       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11905         VD = DRE->getDecl();
11906       } else {
11907         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11908         assert((ME->isImplicitCXXThis() ||
11909                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11910                "Expected member of current class.");
11911         VD = ME->getMemberDecl();
11912       }
11913       DS.insert(VD);
11914     }
11915   }
11916 }
11917 
11918 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11919   if (!NeedToPush)
11920     return;
11921   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11922 }
11923 
11924 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11925     CodeGenFunction &CGF,
11926     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
11927                          std::pair<Address, Address>> &LocalVars)
11928     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11929   if (!NeedToPush)
11930     return;
11931   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11932       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11933   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11934 }
11935 
11936 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11937   if (!NeedToPush)
11938     return;
11939   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11940 }
11941 
11942 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11943   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11944 
11945   return llvm::any_of(
11946       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11947       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11948 }
11949 
11950 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11951     const OMPExecutableDirective &S,
11952     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11953     const {
11954   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11955   // Vars in target/task regions must be excluded completely.
11956   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11957       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11958     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11959     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11960     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11961     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11962       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11963         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11964     }
11965   }
11966   // Exclude vars in private clauses.
11967   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11968     for (const Expr *Ref : C->varlists()) {
11969       if (!Ref->getType()->isScalarType())
11970         continue;
11971       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11972       if (!DRE)
11973         continue;
11974       NeedToCheckForLPCs.insert(DRE->getDecl());
11975     }
11976   }
11977   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11978     for (const Expr *Ref : C->varlists()) {
11979       if (!Ref->getType()->isScalarType())
11980         continue;
11981       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11982       if (!DRE)
11983         continue;
11984       NeedToCheckForLPCs.insert(DRE->getDecl());
11985     }
11986   }
11987   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11988     for (const Expr *Ref : C->varlists()) {
11989       if (!Ref->getType()->isScalarType())
11990         continue;
11991       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11992       if (!DRE)
11993         continue;
11994       NeedToCheckForLPCs.insert(DRE->getDecl());
11995     }
11996   }
11997   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11998     for (const Expr *Ref : C->varlists()) {
11999       if (!Ref->getType()->isScalarType())
12000         continue;
12001       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12002       if (!DRE)
12003         continue;
12004       NeedToCheckForLPCs.insert(DRE->getDecl());
12005     }
12006   }
12007   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12008     for (const Expr *Ref : C->varlists()) {
12009       if (!Ref->getType()->isScalarType())
12010         continue;
12011       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12012       if (!DRE)
12013         continue;
12014       NeedToCheckForLPCs.insert(DRE->getDecl());
12015     }
12016   }
12017   for (const Decl *VD : NeedToCheckForLPCs) {
12018     for (const LastprivateConditionalData &Data :
12019          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12020       if (Data.DeclToUniqueName.count(VD) > 0) {
12021         if (!Data.Disabled)
12022           NeedToAddForLPCsAsDisabled.insert(VD);
12023         break;
12024       }
12025     }
12026   }
12027 }
12028 
12029 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12030     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12031     : CGM(CGF.CGM),
12032       Action((CGM.getLangOpts().OpenMP >= 50 &&
12033               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12034                            [](const OMPLastprivateClause *C) {
12035                              return C->getKind() ==
12036                                     OMPC_LASTPRIVATE_conditional;
12037                            }))
12038                  ? ActionToDo::PushAsLastprivateConditional
12039                  : ActionToDo::DoNotPush) {
12040   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12041   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12042     return;
12043   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12044          "Expected a push action.");
12045   LastprivateConditionalData &Data =
12046       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12047   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12048     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12049       continue;
12050 
12051     for (const Expr *Ref : C->varlists()) {
12052       Data.DeclToUniqueName.insert(std::make_pair(
12053           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12054           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12055     }
12056   }
12057   Data.IVLVal = IVLVal;
12058   Data.Fn = CGF.CurFn;
12059 }
12060 
12061 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12062     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12063     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12064   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12065   if (CGM.getLangOpts().OpenMP < 50)
12066     return;
12067   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12068   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12069   if (!NeedToAddForLPCsAsDisabled.empty()) {
12070     Action = ActionToDo::DisableLastprivateConditional;
12071     LastprivateConditionalData &Data =
12072         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12073     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12074       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12075     Data.Fn = CGF.CurFn;
12076     Data.Disabled = true;
12077   }
12078 }
12079 
12080 CGOpenMPRuntime::LastprivateConditionalRAII
12081 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12082     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12083   return LastprivateConditionalRAII(CGF, S);
12084 }
12085 
12086 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12087   if (CGM.getLangOpts().OpenMP < 50)
12088     return;
12089   if (Action == ActionToDo::DisableLastprivateConditional) {
12090     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12091            "Expected list of disabled private vars.");
12092     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12093   }
12094   if (Action == ActionToDo::PushAsLastprivateConditional) {
12095     assert(
12096         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12097         "Expected list of lastprivate conditional vars.");
12098     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12099   }
12100 }
12101 
12102 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12103                                                         const VarDecl *VD) {
12104   ASTContext &C = CGM.getContext();
12105   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12106   if (I == LastprivateConditionalToTypes.end())
12107     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12108   QualType NewType;
12109   const FieldDecl *VDField;
12110   const FieldDecl *FiredField;
12111   LValue BaseLVal;
12112   auto VI = I->getSecond().find(VD);
12113   if (VI == I->getSecond().end()) {
12114     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12115     RD->startDefinition();
12116     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12117     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12118     RD->completeDefinition();
12119     NewType = C.getRecordType(RD);
12120     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12121     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12122     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12123   } else {
12124     NewType = std::get<0>(VI->getSecond());
12125     VDField = std::get<1>(VI->getSecond());
12126     FiredField = std::get<2>(VI->getSecond());
12127     BaseLVal = std::get<3>(VI->getSecond());
12128   }
12129   LValue FiredLVal =
12130       CGF.EmitLValueForField(BaseLVal, FiredField);
12131   CGF.EmitStoreOfScalar(
12132       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12133       FiredLVal);
12134   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12135 }
12136 
12137 namespace {
12138 /// Checks if the lastprivate conditional variable is referenced in LHS.
12139 class LastprivateConditionalRefChecker final
12140     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12141   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12142   const Expr *FoundE = nullptr;
12143   const Decl *FoundD = nullptr;
12144   StringRef UniqueDeclName;
12145   LValue IVLVal;
12146   llvm::Function *FoundFn = nullptr;
12147   SourceLocation Loc;
12148 
12149 public:
12150   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12151     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12152          llvm::reverse(LPM)) {
12153       auto It = D.DeclToUniqueName.find(E->getDecl());
12154       if (It == D.DeclToUniqueName.end())
12155         continue;
12156       if (D.Disabled)
12157         return false;
12158       FoundE = E;
12159       FoundD = E->getDecl()->getCanonicalDecl();
12160       UniqueDeclName = It->second;
12161       IVLVal = D.IVLVal;
12162       FoundFn = D.Fn;
12163       break;
12164     }
12165     return FoundE == E;
12166   }
12167   bool VisitMemberExpr(const MemberExpr *E) {
12168     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12169       return false;
12170     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12171          llvm::reverse(LPM)) {
12172       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12173       if (It == D.DeclToUniqueName.end())
12174         continue;
12175       if (D.Disabled)
12176         return false;
12177       FoundE = E;
12178       FoundD = E->getMemberDecl()->getCanonicalDecl();
12179       UniqueDeclName = It->second;
12180       IVLVal = D.IVLVal;
12181       FoundFn = D.Fn;
12182       break;
12183     }
12184     return FoundE == E;
12185   }
12186   bool VisitStmt(const Stmt *S) {
12187     for (const Stmt *Child : S->children()) {
12188       if (!Child)
12189         continue;
12190       if (const auto *E = dyn_cast<Expr>(Child))
12191         if (!E->isGLValue())
12192           continue;
12193       if (Visit(Child))
12194         return true;
12195     }
12196     return false;
12197   }
12198   explicit LastprivateConditionalRefChecker(
12199       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12200       : LPM(LPM) {}
12201   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12202   getFoundData() const {
12203     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12204   }
12205 };
12206 } // namespace
12207 
12208 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12209                                                        LValue IVLVal,
12210                                                        StringRef UniqueDeclName,
12211                                                        LValue LVal,
12212                                                        SourceLocation Loc) {
12213   // Last updated loop counter for the lastprivate conditional var.
12214   // int<xx> last_iv = 0;
12215   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12216   llvm::Constant *LastIV =
12217       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12218   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12219       IVLVal.getAlignment().getAsAlign());
12220   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12221 
12222   // Last value of the lastprivate conditional.
12223   // decltype(priv_a) last_a;
12224   llvm::Constant *Last = getOrCreateInternalVariable(
12225       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12226   cast<llvm::GlobalVariable>(Last)->setAlignment(
12227       LVal.getAlignment().getAsAlign());
12228   LValue LastLVal =
12229       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12230 
12231   // Global loop counter. Required to handle inner parallel-for regions.
12232   // iv
12233   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12234 
12235   // #pragma omp critical(a)
12236   // if (last_iv <= iv) {
12237   //   last_iv = iv;
12238   //   last_a = priv_a;
12239   // }
12240   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12241                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12242     Action.Enter(CGF);
12243     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12244     // (last_iv <= iv) ? Check if the variable is updated and store new
12245     // value in global var.
12246     llvm::Value *CmpRes;
12247     if (IVLVal.getType()->isSignedIntegerType()) {
12248       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12249     } else {
12250       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12251              "Loop iteration variable must be integer.");
12252       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12253     }
12254     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12255     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12256     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12257     // {
12258     CGF.EmitBlock(ThenBB);
12259 
12260     //   last_iv = iv;
12261     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12262 
12263     //   last_a = priv_a;
12264     switch (CGF.getEvaluationKind(LVal.getType())) {
12265     case TEK_Scalar: {
12266       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12267       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12268       break;
12269     }
12270     case TEK_Complex: {
12271       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12272       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12273       break;
12274     }
12275     case TEK_Aggregate:
12276       llvm_unreachable(
12277           "Aggregates are not supported in lastprivate conditional.");
12278     }
12279     // }
12280     CGF.EmitBranch(ExitBB);
12281     // There is no need to emit line number for unconditional branch.
12282     (void)ApplyDebugLocation::CreateEmpty(CGF);
12283     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12284   };
12285 
12286   if (CGM.getLangOpts().OpenMPSimd) {
12287     // Do not emit as a critical region as no parallel region could be emitted.
12288     RegionCodeGenTy ThenRCG(CodeGen);
12289     ThenRCG(CGF);
12290   } else {
12291     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12292   }
12293 }
12294 
12295 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12296                                                          const Expr *LHS) {
12297   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12298     return;
12299   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12300   if (!Checker.Visit(LHS))
12301     return;
12302   const Expr *FoundE;
12303   const Decl *FoundD;
12304   StringRef UniqueDeclName;
12305   LValue IVLVal;
12306   llvm::Function *FoundFn;
12307   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12308       Checker.getFoundData();
12309   if (FoundFn != CGF.CurFn) {
12310     // Special codegen for inner parallel regions.
12311     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12312     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12313     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12314            "Lastprivate conditional is not found in outer region.");
12315     QualType StructTy = std::get<0>(It->getSecond());
12316     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12317     LValue PrivLVal = CGF.EmitLValue(FoundE);
12318     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12319         PrivLVal.getAddress(CGF),
12320         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12321     LValue BaseLVal =
12322         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12323     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12324     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12325                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12326                         FiredLVal, llvm::AtomicOrdering::Unordered,
12327                         /*IsVolatile=*/true, /*isInit=*/false);
12328     return;
12329   }
12330 
12331   // Private address of the lastprivate conditional in the current context.
12332   // priv_a
12333   LValue LVal = CGF.EmitLValue(FoundE);
12334   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12335                                    FoundE->getExprLoc());
12336 }
12337 
12338 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12339     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12340     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12341   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12342     return;
12343   auto Range = llvm::reverse(LastprivateConditionalStack);
12344   auto It = llvm::find_if(
12345       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12346   if (It == Range.end() || It->Fn != CGF.CurFn)
12347     return;
12348   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12349   assert(LPCI != LastprivateConditionalToTypes.end() &&
12350          "Lastprivates must be registered already.");
12351   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12352   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12353   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12354   for (const auto &Pair : It->DeclToUniqueName) {
12355     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12356     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12357       continue;
12358     auto I = LPCI->getSecond().find(Pair.first);
12359     assert(I != LPCI->getSecond().end() &&
12360            "Lastprivate must be rehistered already.");
12361     // bool Cmp = priv_a.Fired != 0;
12362     LValue BaseLVal = std::get<3>(I->getSecond());
12363     LValue FiredLVal =
12364         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12365     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12366     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12367     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12368     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12369     // if (Cmp) {
12370     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12371     CGF.EmitBlock(ThenBB);
12372     Address Addr = CGF.GetAddrOfLocalVar(VD);
12373     LValue LVal;
12374     if (VD->getType()->isReferenceType())
12375       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12376                                            AlignmentSource::Decl);
12377     else
12378       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12379                                 AlignmentSource::Decl);
12380     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12381                                      D.getBeginLoc());
12382     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12383     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12384     // }
12385   }
12386 }
12387 
12388 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12389     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12390     SourceLocation Loc) {
12391   if (CGF.getLangOpts().OpenMP < 50)
12392     return;
12393   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12394   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12395          "Unknown lastprivate conditional variable.");
12396   StringRef UniqueName = It->second;
12397   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12398   // The variable was not updated in the region - exit.
12399   if (!GV)
12400     return;
12401   LValue LPLVal = CGF.MakeAddrLValue(
12402       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12403   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12404   CGF.EmitStoreOfScalar(Res, PrivLVal);
12405 }
12406 
12407 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12408     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12409     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12410   llvm_unreachable("Not supported in SIMD-only mode");
12411 }
12412 
12413 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12414     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12415     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12416   llvm_unreachable("Not supported in SIMD-only mode");
12417 }
12418 
12419 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12420     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12421     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12422     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12423     bool Tied, unsigned &NumberOfParts) {
12424   llvm_unreachable("Not supported in SIMD-only mode");
12425 }
12426 
12427 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12428                                            SourceLocation Loc,
12429                                            llvm::Function *OutlinedFn,
12430                                            ArrayRef<llvm::Value *> CapturedVars,
12431                                            const Expr *IfCond) {
12432   llvm_unreachable("Not supported in SIMD-only mode");
12433 }
12434 
12435 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12436     CodeGenFunction &CGF, StringRef CriticalName,
12437     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12438     const Expr *Hint) {
12439   llvm_unreachable("Not supported in SIMD-only mode");
12440 }
12441 
12442 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12443                                            const RegionCodeGenTy &MasterOpGen,
12444                                            SourceLocation Loc) {
12445   llvm_unreachable("Not supported in SIMD-only mode");
12446 }
12447 
12448 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12449                                             SourceLocation Loc) {
12450   llvm_unreachable("Not supported in SIMD-only mode");
12451 }
12452 
12453 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12454     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12455     SourceLocation Loc) {
12456   llvm_unreachable("Not supported in SIMD-only mode");
12457 }
12458 
12459 void CGOpenMPSIMDRuntime::emitSingleRegion(
12460     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12461     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12462     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12463     ArrayRef<const Expr *> AssignmentOps) {
12464   llvm_unreachable("Not supported in SIMD-only mode");
12465 }
12466 
12467 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12468                                             const RegionCodeGenTy &OrderedOpGen,
12469                                             SourceLocation Loc,
12470                                             bool IsThreads) {
12471   llvm_unreachable("Not supported in SIMD-only mode");
12472 }
12473 
12474 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12475                                           SourceLocation Loc,
12476                                           OpenMPDirectiveKind Kind,
12477                                           bool EmitChecks,
12478                                           bool ForceSimpleCall) {
12479   llvm_unreachable("Not supported in SIMD-only mode");
12480 }
12481 
12482 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12483     CodeGenFunction &CGF, SourceLocation Loc,
12484     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12485     bool Ordered, const DispatchRTInput &DispatchValues) {
12486   llvm_unreachable("Not supported in SIMD-only mode");
12487 }
12488 
12489 void CGOpenMPSIMDRuntime::emitForStaticInit(
12490     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12491     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12492   llvm_unreachable("Not supported in SIMD-only mode");
12493 }
12494 
12495 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12496     CodeGenFunction &CGF, SourceLocation Loc,
12497     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12498   llvm_unreachable("Not supported in SIMD-only mode");
12499 }
12500 
12501 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12502                                                      SourceLocation Loc,
12503                                                      unsigned IVSize,
12504                                                      bool IVSigned) {
12505   llvm_unreachable("Not supported in SIMD-only mode");
12506 }
12507 
12508 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12509                                               SourceLocation Loc,
12510                                               OpenMPDirectiveKind DKind) {
12511   llvm_unreachable("Not supported in SIMD-only mode");
12512 }
12513 
12514 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12515                                               SourceLocation Loc,
12516                                               unsigned IVSize, bool IVSigned,
12517                                               Address IL, Address LB,
12518                                               Address UB, Address ST) {
12519   llvm_unreachable("Not supported in SIMD-only mode");
12520 }
12521 
12522 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12523                                                llvm::Value *NumThreads,
12524                                                SourceLocation Loc) {
12525   llvm_unreachable("Not supported in SIMD-only mode");
12526 }
12527 
12528 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12529                                              ProcBindKind ProcBind,
12530                                              SourceLocation Loc) {
12531   llvm_unreachable("Not supported in SIMD-only mode");
12532 }
12533 
12534 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12535                                                     const VarDecl *VD,
12536                                                     Address VDAddr,
12537                                                     SourceLocation Loc) {
12538   llvm_unreachable("Not supported in SIMD-only mode");
12539 }
12540 
12541 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12542     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12543     CodeGenFunction *CGF) {
12544   llvm_unreachable("Not supported in SIMD-only mode");
12545 }
12546 
12547 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12548     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12549   llvm_unreachable("Not supported in SIMD-only mode");
12550 }
12551 
12552 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12553                                     ArrayRef<const Expr *> Vars,
12554                                     SourceLocation Loc,
12555                                     llvm::AtomicOrdering AO) {
12556   llvm_unreachable("Not supported in SIMD-only mode");
12557 }
12558 
12559 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12560                                        const OMPExecutableDirective &D,
12561                                        llvm::Function *TaskFunction,
12562                                        QualType SharedsTy, Address Shareds,
12563                                        const Expr *IfCond,
12564                                        const OMPTaskDataTy &Data) {
12565   llvm_unreachable("Not supported in SIMD-only mode");
12566 }
12567 
12568 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12569     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12570     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12571     const Expr *IfCond, const OMPTaskDataTy &Data) {
12572   llvm_unreachable("Not supported in SIMD-only mode");
12573 }
12574 
12575 void CGOpenMPSIMDRuntime::emitReduction(
12576     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12577     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12578     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12579   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12580   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12581                                  ReductionOps, Options);
12582 }
12583 
12584 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12585     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12586     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12587   llvm_unreachable("Not supported in SIMD-only mode");
12588 }
12589 
12590 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12591                                                 SourceLocation Loc,
12592                                                 bool IsWorksharingReduction) {
12593   llvm_unreachable("Not supported in SIMD-only mode");
12594 }
12595 
12596 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12597                                                   SourceLocation Loc,
12598                                                   ReductionCodeGen &RCG,
12599                                                   unsigned N) {
12600   llvm_unreachable("Not supported in SIMD-only mode");
12601 }
12602 
12603 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12604                                                   SourceLocation Loc,
12605                                                   llvm::Value *ReductionsPtr,
12606                                                   LValue SharedLVal) {
12607   llvm_unreachable("Not supported in SIMD-only mode");
12608 }
12609 
12610 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12611                                            SourceLocation Loc) {
12612   llvm_unreachable("Not supported in SIMD-only mode");
12613 }
12614 
12615 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12616     CodeGenFunction &CGF, SourceLocation Loc,
12617     OpenMPDirectiveKind CancelRegion) {
12618   llvm_unreachable("Not supported in SIMD-only mode");
12619 }
12620 
12621 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12622                                          SourceLocation Loc, const Expr *IfCond,
12623                                          OpenMPDirectiveKind CancelRegion) {
12624   llvm_unreachable("Not supported in SIMD-only mode");
12625 }
12626 
12627 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12628     const OMPExecutableDirective &D, StringRef ParentName,
12629     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12630     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12631   llvm_unreachable("Not supported in SIMD-only mode");
12632 }
12633 
12634 void CGOpenMPSIMDRuntime::emitTargetCall(
12635     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12636     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12637     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12638     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12639                                      const OMPLoopDirective &D)>
12640         SizeEmitter) {
12641   llvm_unreachable("Not supported in SIMD-only mode");
12642 }
12643 
12644 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12645   llvm_unreachable("Not supported in SIMD-only mode");
12646 }
12647 
12648 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12649   llvm_unreachable("Not supported in SIMD-only mode");
12650 }
12651 
12652 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12653   return false;
12654 }
12655 
12656 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12657                                         const OMPExecutableDirective &D,
12658                                         SourceLocation Loc,
12659                                         llvm::Function *OutlinedFn,
12660                                         ArrayRef<llvm::Value *> CapturedVars) {
12661   llvm_unreachable("Not supported in SIMD-only mode");
12662 }
12663 
12664 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12665                                              const Expr *NumTeams,
12666                                              const Expr *ThreadLimit,
12667                                              SourceLocation Loc) {
12668   llvm_unreachable("Not supported in SIMD-only mode");
12669 }
12670 
12671 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12672     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12673     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12674   llvm_unreachable("Not supported in SIMD-only mode");
12675 }
12676 
12677 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12678     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12679     const Expr *Device) {
12680   llvm_unreachable("Not supported in SIMD-only mode");
12681 }
12682 
12683 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12684                                            const OMPLoopDirective &D,
12685                                            ArrayRef<Expr *> NumIterations) {
12686   llvm_unreachable("Not supported in SIMD-only mode");
12687 }
12688 
12689 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12690                                               const OMPDependClause *C) {
12691   llvm_unreachable("Not supported in SIMD-only mode");
12692 }
12693 
12694 const VarDecl *
12695 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12696                                         const VarDecl *NativeParam) const {
12697   llvm_unreachable("Not supported in SIMD-only mode");
12698 }
12699 
12700 Address
12701 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12702                                          const VarDecl *NativeParam,
12703                                          const VarDecl *TargetParam) const {
12704   llvm_unreachable("Not supported in SIMD-only mode");
12705 }
12706