1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412 
413 public:
414   /// Constructs region for combined constructs.
415   /// \param CodeGen Code generation sequence for combined directives. Includes
416   /// a list of functions used for code generation of implicitly inlined
417   /// regions.
418   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419                           OpenMPDirectiveKind Kind, bool HasCancel)
420       : CGF(CGF) {
421     // Start emission for the construct.
422     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426     CGF.LambdaThisCaptureField = nullptr;
427     BlockInfo = CGF.BlockInfo;
428     CGF.BlockInfo = nullptr;
429   }
430 
431   ~InlinedOpenMPRegionRAII() {
432     // Restore original CapturedStmtInfo only if we're done with code emission.
433     auto *OldCSI =
434         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435     delete CGF.CapturedStmtInfo;
436     CGF.CapturedStmtInfo = OldCSI;
437     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439     CGF.BlockInfo = BlockInfo;
440   }
441 };
442 
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447   /// Use trampoline for internal microtask.
448   OMP_IDENT_IMD = 0x01,
449   /// Use c-style ident structure.
450   OMP_IDENT_KMPC = 0x02,
451   /// Atomic reduction option for kmpc_reduce.
452   OMP_ATOMIC_REDUCE = 0x10,
453   /// Explicit 'barrier' directive.
454   OMP_IDENT_BARRIER_EXPL = 0x20,
455   /// Implicit barrier in code.
456   OMP_IDENT_BARRIER_IMPL = 0x40,
457   /// Implicit barrier in 'for' directive.
458   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459   /// Implicit barrier in 'sections' directive.
460   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461   /// Implicit barrier in 'single' directive.
462   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463   /// Call of __kmp_for_static_init for static loop.
464   OMP_IDENT_WORK_LOOP = 0x200,
465   /// Call of __kmp_for_static_init for sections.
466   OMP_IDENT_WORK_SECTIONS = 0x400,
467   /// Call of __kmp_for_static_init for distribute.
468   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471 
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476   /// flag undefined.
477   OMP_REQ_UNDEFINED               = 0x000,
478   /// no requires clause present.
479   OMP_REQ_NONE                    = 0x001,
480   /// reverse_offload clause.
481   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482   /// unified_address clause.
483   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484   /// unified_shared_memory clause.
485   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486   /// dynamic_allocators clause.
487   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490 
491 enum OpenMPOffloadingReservedDeviceIDs {
492   /// Device ID if the device was not defined, runtime should get it
493   /// from environment variables in the spec.
494   OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497 
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504 ///                                  see above  */
505 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506 ///                                  KMP_IDENT_KMPC identifies this union
507 ///                                  member  */
508 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509 ///                                  see above */
510 ///#if USE_ITT_BUILD
511 ///                            /*  but currently used for storing
512 ///                                region-specific ITT */
513 ///                            /*  contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516 ///                                 C++  */
517 ///    char const *psource;    /**< String describing the source location.
518 ///                            The string is composed of semi-colon separated
519 //                             fields which describe the source file,
520 ///                            the function and a pair of line numbers that
521 ///                            delimit the construct.
522 ///                             */
523 /// } ident_t;
524 enum IdentFieldIndex {
525   /// might be used in Fortran
526   IdentField_Reserved_1,
527   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528   IdentField_Flags,
529   /// Not really used in Fortran any more
530   IdentField_Reserved_2,
531   /// Source[4] in Fortran, do not use for C++
532   IdentField_Reserved_3,
533   /// String describing the source location. The string is composed of
534   /// semi-colon separated fields which describe the source file, the function
535   /// and a pair of line numbers that delimit the construct.
536   IdentField_PSource
537 };
538 
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542   /// Lower bound for default (unordered) versions.
543   OMP_sch_lower = 32,
544   OMP_sch_static_chunked = 33,
545   OMP_sch_static = 34,
546   OMP_sch_dynamic_chunked = 35,
547   OMP_sch_guided_chunked = 36,
548   OMP_sch_runtime = 37,
549   OMP_sch_auto = 38,
550   /// static with chunk adjustment (e.g., simd)
551   OMP_sch_static_balanced_chunked = 45,
552   /// Lower bound for 'ordered' versions.
553   OMP_ord_lower = 64,
554   OMP_ord_static_chunked = 65,
555   OMP_ord_static = 66,
556   OMP_ord_dynamic_chunked = 67,
557   OMP_ord_guided_chunked = 68,
558   OMP_ord_runtime = 69,
559   OMP_ord_auto = 70,
560   OMP_sch_default = OMP_sch_static,
561   /// dist_schedule types
562   OMP_dist_sch_static_chunked = 91,
563   OMP_dist_sch_static = 92,
564   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565   /// Set if the monotonic schedule modifier was present.
566   OMP_sch_modifier_monotonic = (1 << 29),
567   /// Set if the nonmonotonic schedule modifier was present.
568   OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570 
571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572 /// region.
573 class CleanupTy final : public EHScopeStack::Cleanup {
574   PrePostActionTy *Action;
575 
576 public:
577   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
578   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579     if (!CGF.HaveInsertPoint())
580       return;
581     Action->Exit(CGF);
582   }
583 };
584 
585 } // anonymous namespace
586 
587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588   CodeGenFunction::RunCleanupsScope Scope(CGF);
589   if (PrePostAction) {
590     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591     Callback(CodeGen, CGF, *PrePostAction);
592   } else {
593     PrePostActionTy Action;
594     Callback(CodeGen, CGF, Action);
595   }
596 }
597 
598 /// Check if the combiner is a call to UDR combiner and if it is so return the
599 /// UDR decl used for reduction.
600 static const OMPDeclareReductionDecl *
601 getReductionInit(const Expr *ReductionOp) {
602   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604       if (const auto *DRE =
605               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607           return DRD;
608   return nullptr;
609 }
610 
611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612                                              const OMPDeclareReductionDecl *DRD,
613                                              const Expr *InitOp,
614                                              Address Private, Address Original,
615                                              QualType Ty) {
616   if (DRD->getInitializer()) {
617     std::pair<llvm::Function *, llvm::Function *> Reduction =
618         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619     const auto *CE = cast<CallExpr>(InitOp);
620     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623     const auto *LHSDRE =
624         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625     const auto *RHSDRE =
626         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629                             [=]() { return Private; });
630     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631                             [=]() { return Original; });
632     (void)PrivateScope.Privatize();
633     RValue Func = RValue::get(Reduction.second);
634     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635     CGF.EmitIgnoredExpr(InitOp);
636   } else {
637     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639     auto *GV = new llvm::GlobalVariable(
640         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641         llvm::GlobalValue::PrivateLinkage, Init, Name);
642     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643     RValue InitRVal;
644     switch (CGF.getEvaluationKind(Ty)) {
645     case TEK_Scalar:
646       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647       break;
648     case TEK_Complex:
649       InitRVal =
650           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651       break;
652     case TEK_Aggregate:
653       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654       break;
655     }
656     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659                          /*IsInitializer=*/false);
660   }
661 }
662 
663 /// Emit initialization of arrays of complex types.
664 /// \param DestAddr Address of the array.
665 /// \param Type Type of array.
666 /// \param Init Initial expression of array.
667 /// \param SrcAddr Address of the original array.
668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669                                  QualType Type, bool EmitDeclareReductionInit,
670                                  const Expr *Init,
671                                  const OMPDeclareReductionDecl *DRD,
672                                  Address SrcAddr = Address::invalid()) {
673   // Perform element-by-element initialization.
674   QualType ElementTy;
675 
676   // Drill down to the base element type on both arrays.
677   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679   DestAddr =
680       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681   if (DRD)
682     SrcAddr =
683         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684 
685   llvm::Value *SrcBegin = nullptr;
686   if (DRD)
687     SrcBegin = SrcAddr.getPointer();
688   llvm::Value *DestBegin = DestAddr.getPointer();
689   // Cast from pointer to array type to pointer to single element.
690   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691   // The basic structure here is a while-do loop.
692   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694   llvm::Value *IsEmpty =
695       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697 
698   // Enter the loop body, making that address the current address.
699   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700   CGF.EmitBlock(BodyBB);
701 
702   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703 
704   llvm::PHINode *SrcElementPHI = nullptr;
705   Address SrcElementCurrent = Address::invalid();
706   if (DRD) {
707     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708                                           "omp.arraycpy.srcElementPast");
709     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710     SrcElementCurrent =
711         Address(SrcElementPHI,
712                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713   }
714   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716   DestElementPHI->addIncoming(DestBegin, EntryBB);
717   Address DestElementCurrent =
718       Address(DestElementPHI,
719               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720 
721   // Emit copy.
722   {
723     CodeGenFunction::RunCleanupsScope InitScope(CGF);
724     if (EmitDeclareReductionInit) {
725       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726                                        SrcElementCurrent, ElementTy);
727     } else
728       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729                            /*IsInitializer=*/false);
730   }
731 
732   if (DRD) {
733     // Shift the address forward by one element.
734     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737   }
738 
739   // Shift the address forward by one element.
740   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742   // Check whether we've reached the end.
743   llvm::Value *Done =
744       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747 
748   // Done.
749   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750 }
751 
752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753   return CGF.EmitOMPSharedLValue(E);
754 }
755 
756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757                                             const Expr *E) {
758   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760   return LValue();
761 }
762 
763 void ReductionCodeGen::emitAggregateInitialization(
764     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765     const OMPDeclareReductionDecl *DRD) {
766   // Emit VarDecl with copy init for arrays.
767   // Get the address of the original variable captured in current
768   // captured region.
769   const auto *PrivateVD =
770       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771   bool EmitDeclareReductionInit =
772       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
773   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774                        EmitDeclareReductionInit,
775                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776                                                 : PrivateVD->getInit(),
777                        DRD, SharedLVal.getAddress(CGF));
778 }
779 
780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781                                    ArrayRef<const Expr *> Origs,
782                                    ArrayRef<const Expr *> Privates,
783                                    ArrayRef<const Expr *> ReductionOps) {
784   ClausesData.reserve(Shareds.size());
785   SharedAddresses.reserve(Shareds.size());
786   Sizes.reserve(Shareds.size());
787   BaseDecls.reserve(Shareds.size());
788   const auto *IOrig = Origs.begin();
789   const auto *IPriv = Privates.begin();
790   const auto *IRed = ReductionOps.begin();
791   for (const Expr *Ref : Shareds) {
792     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793     std::advance(IOrig, 1);
794     std::advance(IPriv, 1);
795     std::advance(IRed, 1);
796   }
797 }
798 
799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801          "Number of generated lvalues must be exactly N.");
802   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804   SharedAddresses.emplace_back(First, Second);
805   if (ClausesData[N].Shared == ClausesData[N].Ref) {
806     OrigAddresses.emplace_back(First, Second);
807   } else {
808     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810     OrigAddresses.emplace_back(First, Second);
811   }
812 }
813 
814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815   const auto *PrivateVD =
816       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817   QualType PrivateType = PrivateVD->getType();
818   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819   if (!PrivateType->isVariablyModifiedType()) {
820     Sizes.emplace_back(
821         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822         nullptr);
823     return;
824   }
825   llvm::Value *Size;
826   llvm::Value *SizeInChars;
827   auto *ElemType =
828       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829           ->getElementType();
830   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831   if (AsArraySection) {
832     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833                                      OrigAddresses[N].first.getPointer(CGF));
834     Size = CGF.Builder.CreateNUWAdd(
835         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837   } else {
838     SizeInChars =
839         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841   }
842   Sizes.emplace_back(SizeInChars, Size);
843   CodeGenFunction::OpaqueValueMapping OpaqueMap(
844       CGF,
845       cast<OpaqueValueExpr>(
846           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847       RValue::get(Size));
848   CGF.EmitVariablyModifiedType(PrivateType);
849 }
850 
851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852                                          llvm::Value *Size) {
853   const auto *PrivateVD =
854       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855   QualType PrivateType = PrivateVD->getType();
856   if (!PrivateType->isVariablyModifiedType()) {
857     assert(!Size && !Sizes[N].second &&
858            "Size should be nullptr for non-variably modified reduction "
859            "items.");
860     return;
861   }
862   CodeGenFunction::OpaqueValueMapping OpaqueMap(
863       CGF,
864       cast<OpaqueValueExpr>(
865           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866       RValue::get(Size));
867   CGF.EmitVariablyModifiedType(PrivateType);
868 }
869 
870 void ReductionCodeGen::emitInitialization(
871     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873   assert(SharedAddresses.size() > N && "No variable was generated");
874   const auto *PrivateVD =
875       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876   const OMPDeclareReductionDecl *DRD =
877       getReductionInit(ClausesData[N].ReductionOp);
878   QualType PrivateType = PrivateVD->getType();
879   PrivateAddr = CGF.Builder.CreateElementBitCast(
880       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881   QualType SharedType = SharedAddresses[N].first.getType();
882   SharedLVal = CGF.MakeAddrLValue(
883       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884                                        CGF.ConvertTypeForMem(SharedType)),
885       SharedType, SharedAddresses[N].first.getBaseInfo(),
886       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888     if (DRD && DRD->getInitializer())
889       (void)DefaultInit(CGF);
890     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
892     (void)DefaultInit(CGF);
893     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894                                      PrivateAddr, SharedLVal.getAddress(CGF),
895                                      SharedLVal.getType());
896   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
897              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899                          PrivateVD->getType().getQualifiers(),
900                          /*IsInitializer=*/false);
901   }
902 }
903 
904 bool ReductionCodeGen::needCleanups(unsigned N) {
905   const auto *PrivateVD =
906       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907   QualType PrivateType = PrivateVD->getType();
908   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909   return DTorKind != QualType::DK_none;
910 }
911 
912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913                                     Address PrivateAddr) {
914   const auto *PrivateVD =
915       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916   QualType PrivateType = PrivateVD->getType();
917   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918   if (needCleanups(N)) {
919     PrivateAddr = CGF.Builder.CreateElementBitCast(
920         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922   }
923 }
924 
925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926                           LValue BaseLV) {
927   BaseTy = BaseTy.getNonReferenceType();
928   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
929          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932     } else {
933       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935     }
936     BaseTy = BaseTy->getPointeeType();
937   }
938   return CGF.MakeAddrLValue(
939       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940                                        CGF.ConvertTypeForMem(ElTy)),
941       BaseLV.getType(), BaseLV.getBaseInfo(),
942       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943 }
944 
945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947                           llvm::Value *Addr) {
948   Address Tmp = Address::invalid();
949   Address TopTmp = Address::invalid();
950   Address MostTopTmp = Address::invalid();
951   BaseTy = BaseTy.getNonReferenceType();
952   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
953          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954     Tmp = CGF.CreateMemTemp(BaseTy);
955     if (TopTmp.isValid())
956       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957     else
958       MostTopTmp = Tmp;
959     TopTmp = Tmp;
960     BaseTy = BaseTy->getPointeeType();
961   }
962   llvm::Type *Ty = BaseLVType;
963   if (Tmp.isValid())
964     Ty = Tmp.getElementType();
965   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966   if (Tmp.isValid()) {
967     CGF.Builder.CreateStore(Addr, Tmp);
968     return MostTopTmp;
969   }
970   return Address(Addr, BaseLVAlignment);
971 }
972 
973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974   const VarDecl *OrigVD = nullptr;
975   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978       Base = TempOASE->getBase()->IgnoreParenImpCasts();
979     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980       Base = TempASE->getBase()->IgnoreParenImpCasts();
981     DE = cast<DeclRefExpr>(Base);
982     OrigVD = cast<VarDecl>(DE->getDecl());
983   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   }
990   return OrigVD;
991 }
992 
993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994                                                Address PrivateAddr) {
995   const DeclRefExpr *DE;
996   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997     BaseDecls.emplace_back(OrigVD);
998     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999     LValue BaseLValue =
1000         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001                     OriginalBaseLValue);
1002     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004     llvm::Value *PrivatePointer =
1005         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006             PrivateAddr.getPointer(),
1007             SharedAddresses[N].first.getAddress(CGF).getType());
1008     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009     return castToBase(CGF, OrigVD->getType(),
1010                       SharedAddresses[N].first.getType(),
1011                       OriginalBaseLValue.getAddress(CGF).getType(),
1012                       OriginalBaseLValue.getAlignment(), Ptr);
1013   }
1014   BaseDecls.emplace_back(
1015       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016   return PrivateAddr;
1017 }
1018 
1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   return DRD && DRD->getInitializer();
1023 }
1024 
1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026   return CGF.EmitLoadOfPointerLValue(
1027       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028       getThreadIDVariable()->getType()->castAs<PointerType>());
1029 }
1030 
1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032   if (!CGF.HaveInsertPoint())
1033     return;
1034   // 1.2.2 OpenMP Language Terminology
1035   // Structured block - An executable statement with a single entry at the
1036   // top and a single exit at the bottom.
1037   // The point of exit cannot be a branch out of the structured block.
1038   // longjmp() and throw() must not violate the entry/exit criteria.
1039   CGF.EHStack.pushTerminate();
1040   CodeGen(CGF);
1041   CGF.EHStack.popTerminate();
1042 }
1043 
1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045     CodeGenFunction &CGF) {
1046   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047                             getThreadIDVariable()->getType(),
1048                             AlignmentSource::Decl);
1049 }
1050 
1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052                                        QualType FieldTy) {
1053   auto *Field = FieldDecl::Create(
1054       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057   Field->setAccess(AS_public);
1058   DC->addDecl(Field);
1059   return Field;
1060 }
1061 
1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063                                  StringRef Separator)
1064     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067 
1068   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069   OMPBuilder.initialize();
1070   loadOffloadInfoMetadata();
1071 }
1072 
1073 void CGOpenMPRuntime::clear() {
1074   InternalVars.clear();
1075   // Clean non-target variable declarations possibly used only in debug info.
1076   for (const auto &Data : EmittedNonTargetVariables) {
1077     if (!Data.getValue().pointsToAliveValue())
1078       continue;
1079     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080     if (!GV)
1081       continue;
1082     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083       continue;
1084     GV->eraseFromParent();
1085   }
1086 }
1087 
1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089   SmallString<128> Buffer;
1090   llvm::raw_svector_ostream OS(Buffer);
1091   StringRef Sep = FirstSeparator;
1092   for (StringRef Part : Parts) {
1093     OS << Sep << Part;
1094     Sep = Separator;
1095   }
1096   return std::string(OS.str());
1097 }
1098 
1099 static llvm::Function *
1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101                           const Expr *CombinerInitializer, const VarDecl *In,
1102                           const VarDecl *Out, bool IsCombiner) {
1103   // void .omp_combiner.(Ty *in, Ty *out);
1104   ASTContext &C = CGM.getContext();
1105   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106   FunctionArgList Args;
1107   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111   Args.push_back(&OmpOutParm);
1112   Args.push_back(&OmpInParm);
1113   const CGFunctionInfo &FnInfo =
1114       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116   std::string Name = CGM.getOpenMPRuntime().getName(
1117       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1118   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119                                     Name, &CGM.getModule());
1120   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121   if (CGM.getLangOpts().Optimize) {
1122     Fn->removeFnAttr(llvm::Attribute::NoInline);
1123     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125   }
1126   CodeGenFunction CGF(CGM);
1127   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130                     Out->getLocation());
1131   CodeGenFunction::OMPPrivateScope Scope(CGF);
1132   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135         .getAddress(CGF);
1136   });
1137   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140         .getAddress(CGF);
1141   });
1142   (void)Scope.Privatize();
1143   if (!IsCombiner && Out->hasInit() &&
1144       !CGF.isTrivialInitializer(Out->getInit())) {
1145     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146                          Out->getType().getQualifiers(),
1147                          /*IsInitializer=*/true);
1148   }
1149   if (CombinerInitializer)
1150     CGF.EmitIgnoredExpr(CombinerInitializer);
1151   Scope.ForceCleanup();
1152   CGF.FinishFunction();
1153   return Fn;
1154 }
1155 
1156 void CGOpenMPRuntime::emitUserDefinedReduction(
1157     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158   if (UDRMap.count(D) > 0)
1159     return;
1160   llvm::Function *Combiner = emitCombinerOrInitializer(
1161       CGM, D->getType(), D->getCombiner(),
1162       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164       /*IsCombiner=*/true);
1165   llvm::Function *Initializer = nullptr;
1166   if (const Expr *Init = D->getInitializer()) {
1167     Initializer = emitCombinerOrInitializer(
1168         CGM, D->getType(),
1169         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170                                                                      : nullptr,
1171         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173         /*IsCombiner=*/false);
1174   }
1175   UDRMap.try_emplace(D, Combiner, Initializer);
1176   if (CGF) {
1177     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178     Decls.second.push_back(D);
1179   }
1180 }
1181 
1182 std::pair<llvm::Function *, llvm::Function *>
1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184   auto I = UDRMap.find(D);
1185   if (I != UDRMap.end())
1186     return I->second;
1187   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188   return UDRMap.lookup(D);
1189 }
1190 
1191 namespace {
1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193 // Builder if one is present.
1194 struct PushAndPopStackRAII {
1195   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196                       bool HasCancel)
1197       : OMPBuilder(OMPBuilder) {
1198     if (!OMPBuilder)
1199       return;
1200 
1201     // The following callback is the crucial part of clangs cleanup process.
1202     //
1203     // NOTE:
1204     // Once the OpenMPIRBuilder is used to create parallel regions (and
1205     // similar), the cancellation destination (Dest below) is determined via
1206     // IP. That means if we have variables to finalize we split the block at IP,
1207     // use the new block (=BB) as destination to build a JumpDest (via
1208     // getJumpDestInCurrentScope(BB)) which then is fed to
1209     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210     // to push & pop an FinalizationInfo object.
1211     // The FiniCB will still be needed but at the point where the
1212     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214       assert(IP.getBlock()->end() == IP.getPoint() &&
1215              "Clang CG should cause non-terminated block!");
1216       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217       CGF.Builder.restoreIP(IP);
1218       CodeGenFunction::JumpDest Dest =
1219           CGF.getOMPCancelDestination(OMPD_parallel);
1220       CGF.EmitBranchThroughCleanup(Dest);
1221     };
1222 
1223     // TODO: Remove this once we emit parallel regions through the
1224     //       OpenMPIRBuilder as it can do this setup internally.
1225     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226         {FiniCB, OMPD_parallel, HasCancel});
1227     OMPBuilder->pushFinalizationCB(std::move(FI));
1228   }
1229   ~PushAndPopStackRAII() {
1230     if (OMPBuilder)
1231       OMPBuilder->popFinalizationCB();
1232   }
1233   llvm::OpenMPIRBuilder *OMPBuilder;
1234 };
1235 } // namespace
1236 
1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241   assert(ThreadIDVar->getType()->isPointerType() &&
1242          "thread id variable must be of type kmp_int32 *");
1243   CodeGenFunction CGF(CGM, true);
1244   bool HasCancel = false;
1245   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246     HasCancel = OPD->hasCancel();
1247   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248     HasCancel = OPD->hasCancel();
1249   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250     HasCancel = OPSD->hasCancel();
1251   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252     HasCancel = OPFD->hasCancel();
1253   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256     HasCancel = OPFD->hasCancel();
1257   else if (const auto *OPFD =
1258                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259     HasCancel = OPFD->hasCancel();
1260   else if (const auto *OPFD =
1261                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263 
1264   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265   //       parallel region to make cancellation barriers work properly.
1266   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269                                     HasCancel, OutlinedHelperName);
1270   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272 }
1273 
1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278   return emitParallelOrTeamsOutlinedFunction(
1279       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280 }
1281 
1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286   return emitParallelOrTeamsOutlinedFunction(
1287       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294     bool Tied, unsigned &NumberOfParts) {
1295   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296                                               PrePostActionTy &) {
1297     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299     llvm::Value *TaskArgs[] = {
1300         UpLoc, ThreadID,
1301         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302                                     TaskTVar->getType()->castAs<PointerType>())
1303             .getPointer(CGF)};
1304     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1306                         TaskArgs);
1307   };
1308   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309                                                             UntiedCodeGen);
1310   CodeGen.setAction(Action);
1311   assert(!ThreadIDVar->getType()->isPointerType() &&
1312          "thread id variable must be of type kmp_int32 for tasks");
1313   const OpenMPDirectiveKind Region =
1314       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315                                                       : OMPD_task;
1316   const CapturedStmt *CS = D.getCapturedStmt(Region);
1317   bool HasCancel = false;
1318   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319     HasCancel = TD->hasCancel();
1320   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321     HasCancel = TD->hasCancel();
1322   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323     HasCancel = TD->hasCancel();
1324   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326 
1327   CodeGenFunction CGF(CGM, true);
1328   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329                                         InnermostKind, HasCancel, Action);
1330   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332   if (!Tied)
1333     NumberOfParts = Action.getNumberOfParts();
1334   return Res;
1335 }
1336 
1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338                              const RecordDecl *RD, const CGRecordLayout &RL,
1339                              ArrayRef<llvm::Constant *> Data) {
1340   llvm::StructType *StructTy = RL.getLLVMType();
1341   unsigned PrevIdx = 0;
1342   ConstantInitBuilder CIBuilder(CGM);
1343   auto DI = Data.begin();
1344   for (const FieldDecl *FD : RD->fields()) {
1345     unsigned Idx = RL.getLLVMFieldNo(FD);
1346     // Fill the alignment.
1347     for (unsigned I = PrevIdx; I < Idx; ++I)
1348       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349     PrevIdx = Idx + 1;
1350     Fields.add(*DI);
1351     ++DI;
1352   }
1353 }
1354 
1355 template <class... As>
1356 static llvm::GlobalVariable *
1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359                    As &&... Args) {
1360   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362   ConstantInitBuilder CIBuilder(CGM);
1363   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364   buildStructValue(Fields, CGM, RD, RL, Data);
1365   return Fields.finishAndCreateGlobal(
1366       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367       std::forward<As>(Args)...);
1368 }
1369 
1370 template <typename T>
1371 static void
1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373                                          ArrayRef<llvm::Constant *> Data,
1374                                          T &Parent) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378   buildStructValue(Fields, CGM, RD, RL, Data);
1379   Fields.finishAndAddTo(Parent);
1380 }
1381 
1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383                                              bool AtCurrentPoint) {
1384   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386 
1387   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388   if (AtCurrentPoint) {
1389     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391   } else {
1392     Elem.second.ServiceInsertPt =
1393         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395   }
1396 }
1397 
1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400   if (Elem.second.ServiceInsertPt) {
1401     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402     Elem.second.ServiceInsertPt = nullptr;
1403     Ptr->eraseFromParent();
1404   }
1405 }
1406 
1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1408                                                   SourceLocation Loc,
1409                                                   SmallString<128> &Buffer) {
1410   llvm::raw_svector_ostream OS(Buffer);
1411   // Build debug location
1412   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1413   OS << ";" << PLoc.getFilename() << ";";
1414   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1415     OS << FD->getQualifiedNameAsString();
1416   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1417   return OS.str();
1418 }
1419 
1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1421                                                  SourceLocation Loc,
1422                                                  unsigned Flags) {
1423   llvm::Constant *SrcLocStr;
1424   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1425       Loc.isInvalid()) {
1426     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1427   } else {
1428     std::string FunctionName = "";
1429     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1430       FunctionName = FD->getQualifiedNameAsString();
1431     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1432     const char *FileName = PLoc.getFilename();
1433     unsigned Line = PLoc.getLine();
1434     unsigned Column = PLoc.getColumn();
1435     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1436                                                 Line, Column);
1437   }
1438   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1439   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1440                                      Reserved2Flags);
1441 }
1442 
1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1444                                           SourceLocation Loc) {
1445   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1446   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1447   // the clang invariants used below might be broken.
1448   if (CGM.getLangOpts().OpenMPIRBuilder) {
1449     SmallString<128> Buffer;
1450     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1451     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1452         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1453     return OMPBuilder.getOrCreateThreadID(
1454         OMPBuilder.getOrCreateIdent(SrcLocStr));
1455   }
1456 
1457   llvm::Value *ThreadID = nullptr;
1458   // Check whether we've already cached a load of the thread id in this
1459   // function.
1460   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1461   if (I != OpenMPLocThreadIDMap.end()) {
1462     ThreadID = I->second.ThreadID;
1463     if (ThreadID != nullptr)
1464       return ThreadID;
1465   }
1466   // If exceptions are enabled, do not use parameter to avoid possible crash.
1467   if (auto *OMPRegionInfo =
1468           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1469     if (OMPRegionInfo->getThreadIDVariable()) {
1470       // Check if this an outlined function with thread id passed as argument.
1471       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1472       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1473       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1474           !CGF.getLangOpts().CXXExceptions ||
1475           CGF.Builder.GetInsertBlock() == TopBlock ||
1476           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1477           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1478               TopBlock ||
1479           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1480               CGF.Builder.GetInsertBlock()) {
1481         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1482         // If value loaded in entry block, cache it and use it everywhere in
1483         // function.
1484         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1485           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1486           Elem.second.ThreadID = ThreadID;
1487         }
1488         return ThreadID;
1489       }
1490     }
1491   }
1492 
1493   // This is not an outlined function region - need to call __kmpc_int32
1494   // kmpc_global_thread_num(ident_t *loc).
1495   // Generate thread id value and cache this value for use across the
1496   // function.
1497   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498   if (!Elem.second.ServiceInsertPt)
1499     setLocThreadIdInsertPt(CGF);
1500   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1501   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1502   llvm::CallInst *Call = CGF.Builder.CreateCall(
1503       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1504                                             OMPRTL___kmpc_global_thread_num),
1505       emitUpdateLocation(CGF, Loc));
1506   Call->setCallingConv(CGF.getRuntimeCC());
1507   Elem.second.ThreadID = Call;
1508   return Call;
1509 }
1510 
1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1512   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1513   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1514     clearLocThreadIdInsertPt(CGF);
1515     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1516   }
1517   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1518     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1519       UDRMap.erase(D);
1520     FunctionUDRMap.erase(CGF.CurFn);
1521   }
1522   auto I = FunctionUDMMap.find(CGF.CurFn);
1523   if (I != FunctionUDMMap.end()) {
1524     for(const auto *D : I->second)
1525       UDMMap.erase(D);
1526     FunctionUDMMap.erase(I);
1527   }
1528   LastprivateConditionalToTypes.erase(CGF.CurFn);
1529   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1530 }
1531 
1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1533   return OMPBuilder.IdentPtr;
1534 }
1535 
1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1537   if (!Kmpc_MicroTy) {
1538     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1539     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1540                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1541     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1542   }
1543   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1544 }
1545 
1546 llvm::FunctionCallee
1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1548   assert((IVSize == 32 || IVSize == 64) &&
1549          "IV size is not compatible with the omp runtime");
1550   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1551                                             : "__kmpc_for_static_init_4u")
1552                                 : (IVSigned ? "__kmpc_for_static_init_8"
1553                                             : "__kmpc_for_static_init_8u");
1554   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1555   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1556   llvm::Type *TypeParams[] = {
1557     getIdentTyPointerTy(),                     // loc
1558     CGM.Int32Ty,                               // tid
1559     CGM.Int32Ty,                               // schedtype
1560     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1561     PtrTy,                                     // p_lower
1562     PtrTy,                                     // p_upper
1563     PtrTy,                                     // p_stride
1564     ITy,                                       // incr
1565     ITy                                        // chunk
1566   };
1567   auto *FnTy =
1568       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1569   return CGM.CreateRuntimeFunction(FnTy, Name);
1570 }
1571 
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1574   assert((IVSize == 32 || IVSize == 64) &&
1575          "IV size is not compatible with the omp runtime");
1576   StringRef Name =
1577       IVSize == 32
1578           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1579           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1582                                CGM.Int32Ty,           // tid
1583                                CGM.Int32Ty,           // schedtype
1584                                ITy,                   // lower
1585                                ITy,                   // upper
1586                                ITy,                   // stride
1587                                ITy                    // chunk
1588   };
1589   auto *FnTy =
1590       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1591   return CGM.CreateRuntimeFunction(FnTy, Name);
1592 }
1593 
1594 llvm::FunctionCallee
1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1596   assert((IVSize == 32 || IVSize == 64) &&
1597          "IV size is not compatible with the omp runtime");
1598   StringRef Name =
1599       IVSize == 32
1600           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1601           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1602   llvm::Type *TypeParams[] = {
1603       getIdentTyPointerTy(), // loc
1604       CGM.Int32Ty,           // tid
1605   };
1606   auto *FnTy =
1607       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1608   return CGM.CreateRuntimeFunction(FnTy, Name);
1609 }
1610 
1611 llvm::FunctionCallee
1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1613   assert((IVSize == 32 || IVSize == 64) &&
1614          "IV size is not compatible with the omp runtime");
1615   StringRef Name =
1616       IVSize == 32
1617           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1618           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1619   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1620   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621   llvm::Type *TypeParams[] = {
1622     getIdentTyPointerTy(),                     // loc
1623     CGM.Int32Ty,                               // tid
1624     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1625     PtrTy,                                     // p_lower
1626     PtrTy,                                     // p_upper
1627     PtrTy                                      // p_stride
1628   };
1629   auto *FnTy =
1630       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1631   return CGM.CreateRuntimeFunction(FnTy, Name);
1632 }
1633 
1634 /// Obtain information that uniquely identifies a target entry. This
1635 /// consists of the file and device IDs as well as line number associated with
1636 /// the relevant entry source location.
1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1638                                      unsigned &DeviceID, unsigned &FileID,
1639                                      unsigned &LineNum) {
1640   SourceManager &SM = C.getSourceManager();
1641 
1642   // The loc should be always valid and have a file ID (the user cannot use
1643   // #pragma directives in macros)
1644 
1645   assert(Loc.isValid() && "Source location is expected to be always valid.");
1646 
1647   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1648   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1649 
1650   llvm::sys::fs::UniqueID ID;
1651   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1652     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1653         << PLoc.getFilename() << EC.message();
1654 
1655   DeviceID = ID.getDevice();
1656   FileID = ID.getFile();
1657   LineNum = PLoc.getLine();
1658 }
1659 
1660 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1661   if (CGM.getLangOpts().OpenMPSimd)
1662     return Address::invalid();
1663   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1664       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1665   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1666               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1667                HasRequiresUnifiedSharedMemory))) {
1668     SmallString<64> PtrName;
1669     {
1670       llvm::raw_svector_ostream OS(PtrName);
1671       OS << CGM.getMangledName(GlobalDecl(VD));
1672       if (!VD->isExternallyVisible()) {
1673         unsigned DeviceID, FileID, Line;
1674         getTargetEntryUniqueInfo(CGM.getContext(),
1675                                  VD->getCanonicalDecl()->getBeginLoc(),
1676                                  DeviceID, FileID, Line);
1677         OS << llvm::format("_%x", FileID);
1678       }
1679       OS << "_decl_tgt_ref_ptr";
1680     }
1681     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1682     if (!Ptr) {
1683       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1684       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1685                                         PtrName);
1686 
1687       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1688       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1689 
1690       if (!CGM.getLangOpts().OpenMPIsDevice)
1691         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1692       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1693     }
1694     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1695   }
1696   return Address::invalid();
1697 }
1698 
1699 llvm::Constant *
1700 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1701   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1702          !CGM.getContext().getTargetInfo().isTLSSupported());
1703   // Lookup the entry, lazily creating it if necessary.
1704   std::string Suffix = getName({"cache", ""});
1705   return getOrCreateInternalVariable(
1706       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1707 }
1708 
1709 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1710                                                 const VarDecl *VD,
1711                                                 Address VDAddr,
1712                                                 SourceLocation Loc) {
1713   if (CGM.getLangOpts().OpenMPUseTLS &&
1714       CGM.getContext().getTargetInfo().isTLSSupported())
1715     return VDAddr;
1716 
1717   llvm::Type *VarTy = VDAddr.getElementType();
1718   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1719                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1720                                                        CGM.Int8PtrTy),
1721                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1722                          getOrCreateThreadPrivateCache(VD)};
1723   return Address(CGF.EmitRuntimeCall(
1724                      OMPBuilder.getOrCreateRuntimeFunction(
1725                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1726                      Args),
1727                  VDAddr.getAlignment());
1728 }
1729 
1730 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1731     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1732     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1733   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1734   // library.
1735   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1736   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1737                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1738                       OMPLoc);
1739   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1740   // to register constructor/destructor for variable.
1741   llvm::Value *Args[] = {
1742       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1743       Ctor, CopyCtor, Dtor};
1744   CGF.EmitRuntimeCall(
1745       OMPBuilder.getOrCreateRuntimeFunction(
1746           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1747       Args);
1748 }
1749 
1750 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1751     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1752     bool PerformInit, CodeGenFunction *CGF) {
1753   if (CGM.getLangOpts().OpenMPUseTLS &&
1754       CGM.getContext().getTargetInfo().isTLSSupported())
1755     return nullptr;
1756 
1757   VD = VD->getDefinition(CGM.getContext());
1758   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1759     QualType ASTTy = VD->getType();
1760 
1761     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1762     const Expr *Init = VD->getAnyInitializer();
1763     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1764       // Generate function that re-emits the declaration's initializer into the
1765       // threadprivate copy of the variable VD
1766       CodeGenFunction CtorCGF(CGM);
1767       FunctionArgList Args;
1768       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1769                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1770                             ImplicitParamDecl::Other);
1771       Args.push_back(&Dst);
1772 
1773       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1774           CGM.getContext().VoidPtrTy, Args);
1775       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1776       std::string Name = getName({"__kmpc_global_ctor_", ""});
1777       llvm::Function *Fn =
1778           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1779       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1780                             Args, Loc, Loc);
1781       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1782           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1783           CGM.getContext().VoidPtrTy, Dst.getLocation());
1784       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1785       Arg = CtorCGF.Builder.CreateElementBitCast(
1786           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1787       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1788                                /*IsInitializer=*/true);
1789       ArgVal = CtorCGF.EmitLoadOfScalar(
1790           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1791           CGM.getContext().VoidPtrTy, Dst.getLocation());
1792       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1793       CtorCGF.FinishFunction();
1794       Ctor = Fn;
1795     }
1796     if (VD->getType().isDestructedType() != QualType::DK_none) {
1797       // Generate function that emits destructor call for the threadprivate copy
1798       // of the variable VD
1799       CodeGenFunction DtorCGF(CGM);
1800       FunctionArgList Args;
1801       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1802                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1803                             ImplicitParamDecl::Other);
1804       Args.push_back(&Dst);
1805 
1806       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1807           CGM.getContext().VoidTy, Args);
1808       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1809       std::string Name = getName({"__kmpc_global_dtor_", ""});
1810       llvm::Function *Fn =
1811           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1812       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1813       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1814                             Loc, Loc);
1815       // Create a scope with an artificial location for the body of this function.
1816       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1817       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1818           DtorCGF.GetAddrOfLocalVar(&Dst),
1819           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1820       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1821                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1822                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1823       DtorCGF.FinishFunction();
1824       Dtor = Fn;
1825     }
1826     // Do not emit init function if it is not required.
1827     if (!Ctor && !Dtor)
1828       return nullptr;
1829 
1830     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1831     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1832                                                /*isVarArg=*/false)
1833                            ->getPointerTo();
1834     // Copying constructor for the threadprivate variable.
1835     // Must be NULL - reserved by runtime, but currently it requires that this
1836     // parameter is always NULL. Otherwise it fires assertion.
1837     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1838     if (Ctor == nullptr) {
1839       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1840                                              /*isVarArg=*/false)
1841                          ->getPointerTo();
1842       Ctor = llvm::Constant::getNullValue(CtorTy);
1843     }
1844     if (Dtor == nullptr) {
1845       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1846                                              /*isVarArg=*/false)
1847                          ->getPointerTo();
1848       Dtor = llvm::Constant::getNullValue(DtorTy);
1849     }
1850     if (!CGF) {
1851       auto *InitFunctionTy =
1852           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1853       std::string Name = getName({"__omp_threadprivate_init_", ""});
1854       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1855           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1856       CodeGenFunction InitCGF(CGM);
1857       FunctionArgList ArgList;
1858       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1859                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1860                             Loc, Loc);
1861       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1862       InitCGF.FinishFunction();
1863       return InitFunction;
1864     }
1865     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1866   }
1867   return nullptr;
1868 }
1869 
1870 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1871                                                      llvm::GlobalVariable *Addr,
1872                                                      bool PerformInit) {
1873   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1874       !CGM.getLangOpts().OpenMPIsDevice)
1875     return false;
1876   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1877       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1878   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1879       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1880        HasRequiresUnifiedSharedMemory))
1881     return CGM.getLangOpts().OpenMPIsDevice;
1882   VD = VD->getDefinition(CGM.getContext());
1883   assert(VD && "Unknown VarDecl");
1884 
1885   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1886     return CGM.getLangOpts().OpenMPIsDevice;
1887 
1888   QualType ASTTy = VD->getType();
1889   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1890 
1891   // Produce the unique prefix to identify the new target regions. We use
1892   // the source location of the variable declaration which we know to not
1893   // conflict with any target region.
1894   unsigned DeviceID;
1895   unsigned FileID;
1896   unsigned Line;
1897   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1898   SmallString<128> Buffer, Out;
1899   {
1900     llvm::raw_svector_ostream OS(Buffer);
1901     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1902        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1903   }
1904 
1905   const Expr *Init = VD->getAnyInitializer();
1906   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1907     llvm::Constant *Ctor;
1908     llvm::Constant *ID;
1909     if (CGM.getLangOpts().OpenMPIsDevice) {
1910       // Generate function that re-emits the declaration's initializer into
1911       // the threadprivate copy of the variable VD
1912       CodeGenFunction CtorCGF(CGM);
1913 
1914       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1915       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1916       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1917           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1918       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1919       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1920                             FunctionArgList(), Loc, Loc);
1921       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1922       CtorCGF.EmitAnyExprToMem(Init,
1923                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1924                                Init->getType().getQualifiers(),
1925                                /*IsInitializer=*/true);
1926       CtorCGF.FinishFunction();
1927       Ctor = Fn;
1928       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1929       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1930     } else {
1931       Ctor = new llvm::GlobalVariable(
1932           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1933           llvm::GlobalValue::PrivateLinkage,
1934           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1935       ID = Ctor;
1936     }
1937 
1938     // Register the information for the entry associated with the constructor.
1939     Out.clear();
1940     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1941         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1942         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1943   }
1944   if (VD->getType().isDestructedType() != QualType::DK_none) {
1945     llvm::Constant *Dtor;
1946     llvm::Constant *ID;
1947     if (CGM.getLangOpts().OpenMPIsDevice) {
1948       // Generate function that emits destructor call for the threadprivate
1949       // copy of the variable VD
1950       CodeGenFunction DtorCGF(CGM);
1951 
1952       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1953       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1954       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1955           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1956       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1957       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1958                             FunctionArgList(), Loc, Loc);
1959       // Create a scope with an artificial location for the body of this
1960       // function.
1961       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1962       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1963                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1964                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1965       DtorCGF.FinishFunction();
1966       Dtor = Fn;
1967       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1968       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1969     } else {
1970       Dtor = new llvm::GlobalVariable(
1971           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1972           llvm::GlobalValue::PrivateLinkage,
1973           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1974       ID = Dtor;
1975     }
1976     // Register the information for the entry associated with the destructor.
1977     Out.clear();
1978     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1979         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1980         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1981   }
1982   return CGM.getLangOpts().OpenMPIsDevice;
1983 }
1984 
1985 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1986                                                           QualType VarType,
1987                                                           StringRef Name) {
1988   std::string Suffix = getName({"artificial", ""});
1989   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1990   llvm::Value *GAddr =
1991       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1992   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1993       CGM.getTarget().isTLSSupported()) {
1994     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1995     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
1996   }
1997   std::string CacheSuffix = getName({"cache", ""});
1998   llvm::Value *Args[] = {
1999       emitUpdateLocation(CGF, SourceLocation()),
2000       getThreadID(CGF, SourceLocation()),
2001       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2002       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2003                                 /*isSigned=*/false),
2004       getOrCreateInternalVariable(
2005           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2006   return Address(
2007       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2008           CGF.EmitRuntimeCall(
2009               OMPBuilder.getOrCreateRuntimeFunction(
2010                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2011               Args),
2012           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2013       CGM.getContext().getTypeAlignInChars(VarType));
2014 }
2015 
2016 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2017                                    const RegionCodeGenTy &ThenGen,
2018                                    const RegionCodeGenTy &ElseGen) {
2019   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2020 
2021   // If the condition constant folds and can be elided, try to avoid emitting
2022   // the condition and the dead arm of the if/else.
2023   bool CondConstant;
2024   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2025     if (CondConstant)
2026       ThenGen(CGF);
2027     else
2028       ElseGen(CGF);
2029     return;
2030   }
2031 
2032   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2033   // emit the conditional branch.
2034   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2035   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2036   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2037   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2038 
2039   // Emit the 'then' code.
2040   CGF.EmitBlock(ThenBlock);
2041   ThenGen(CGF);
2042   CGF.EmitBranch(ContBlock);
2043   // Emit the 'else' code if present.
2044   // There is no need to emit line number for unconditional branch.
2045   (void)ApplyDebugLocation::CreateEmpty(CGF);
2046   CGF.EmitBlock(ElseBlock);
2047   ElseGen(CGF);
2048   // There is no need to emit line number for unconditional branch.
2049   (void)ApplyDebugLocation::CreateEmpty(CGF);
2050   CGF.EmitBranch(ContBlock);
2051   // Emit the continuation block for code after the if.
2052   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2053 }
2054 
2055 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2056                                        llvm::Function *OutlinedFn,
2057                                        ArrayRef<llvm::Value *> CapturedVars,
2058                                        const Expr *IfCond) {
2059   if (!CGF.HaveInsertPoint())
2060     return;
2061   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2062   auto &M = CGM.getModule();
2063   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2064                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2065     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2066     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067     llvm::Value *Args[] = {
2068         RTLoc,
2069         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2070         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2071     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2072     RealArgs.append(std::begin(Args), std::end(Args));
2073     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2074 
2075     llvm::FunctionCallee RTLFn =
2076         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2077     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2078   };
2079   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2080                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2081     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2082     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2083     // Build calls:
2084     // __kmpc_serialized_parallel(&Loc, GTid);
2085     llvm::Value *Args[] = {RTLoc, ThreadID};
2086     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2087                             M, OMPRTL___kmpc_serialized_parallel),
2088                         Args);
2089 
2090     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2091     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2092     Address ZeroAddrBound =
2093         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2094                                          /*Name=*/".bound.zero.addr");
2095     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2096     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2097     // ThreadId for serialized parallels is 0.
2098     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2099     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2100     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2101     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2102 
2103     // __kmpc_end_serialized_parallel(&Loc, GTid);
2104     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2105     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2106                             M, OMPRTL___kmpc_end_serialized_parallel),
2107                         EndArgs);
2108   };
2109   if (IfCond) {
2110     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2111   } else {
2112     RegionCodeGenTy ThenRCG(ThenGen);
2113     ThenRCG(CGF);
2114   }
2115 }
2116 
2117 // If we're inside an (outlined) parallel region, use the region info's
2118 // thread-ID variable (it is passed in a first argument of the outlined function
2119 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2120 // regular serial code region, get thread ID by calling kmp_int32
2121 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2122 // return the address of that temp.
2123 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2124                                              SourceLocation Loc) {
2125   if (auto *OMPRegionInfo =
2126           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2127     if (OMPRegionInfo->getThreadIDVariable())
2128       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2129 
2130   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2131   QualType Int32Ty =
2132       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2133   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2134   CGF.EmitStoreOfScalar(ThreadID,
2135                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2136 
2137   return ThreadIDTemp;
2138 }
2139 
2140 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2141     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2142   SmallString<256> Buffer;
2143   llvm::raw_svector_ostream Out(Buffer);
2144   Out << Name;
2145   StringRef RuntimeName = Out.str();
2146   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2147   if (Elem.second) {
2148     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2149            "OMP internal variable has different type than requested");
2150     return &*Elem.second;
2151   }
2152 
2153   return Elem.second = new llvm::GlobalVariable(
2154              CGM.getModule(), Ty, /*IsConstant*/ false,
2155              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2156              Elem.first(), /*InsertBefore=*/nullptr,
2157              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2158 }
2159 
2160 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2161   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2162   std::string Name = getName({Prefix, "var"});
2163   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2164 }
2165 
2166 namespace {
2167 /// Common pre(post)-action for different OpenMP constructs.
2168 class CommonActionTy final : public PrePostActionTy {
2169   llvm::FunctionCallee EnterCallee;
2170   ArrayRef<llvm::Value *> EnterArgs;
2171   llvm::FunctionCallee ExitCallee;
2172   ArrayRef<llvm::Value *> ExitArgs;
2173   bool Conditional;
2174   llvm::BasicBlock *ContBlock = nullptr;
2175 
2176 public:
2177   CommonActionTy(llvm::FunctionCallee EnterCallee,
2178                  ArrayRef<llvm::Value *> EnterArgs,
2179                  llvm::FunctionCallee ExitCallee,
2180                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2181       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2182         ExitArgs(ExitArgs), Conditional(Conditional) {}
2183   void Enter(CodeGenFunction &CGF) override {
2184     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2185     if (Conditional) {
2186       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2187       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2188       ContBlock = CGF.createBasicBlock("omp_if.end");
2189       // Generate the branch (If-stmt)
2190       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2191       CGF.EmitBlock(ThenBlock);
2192     }
2193   }
2194   void Done(CodeGenFunction &CGF) {
2195     // Emit the rest of blocks/branches
2196     CGF.EmitBranch(ContBlock);
2197     CGF.EmitBlock(ContBlock, true);
2198   }
2199   void Exit(CodeGenFunction &CGF) override {
2200     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2201   }
2202 };
2203 } // anonymous namespace
2204 
2205 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2206                                          StringRef CriticalName,
2207                                          const RegionCodeGenTy &CriticalOpGen,
2208                                          SourceLocation Loc, const Expr *Hint) {
2209   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2210   // CriticalOpGen();
2211   // __kmpc_end_critical(ident_t *, gtid, Lock);
2212   // Prepare arguments and build a call to __kmpc_critical
2213   if (!CGF.HaveInsertPoint())
2214     return;
2215   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2216                          getCriticalRegionLock(CriticalName)};
2217   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2218                                                 std::end(Args));
2219   if (Hint) {
2220     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2221         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2222   }
2223   CommonActionTy Action(
2224       OMPBuilder.getOrCreateRuntimeFunction(
2225           CGM.getModule(),
2226           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2227       EnterArgs,
2228       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2229                                             OMPRTL___kmpc_end_critical),
2230       Args);
2231   CriticalOpGen.setAction(Action);
2232   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2233 }
2234 
2235 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2236                                        const RegionCodeGenTy &MasterOpGen,
2237                                        SourceLocation Loc) {
2238   if (!CGF.HaveInsertPoint())
2239     return;
2240   // if(__kmpc_master(ident_t *, gtid)) {
2241   //   MasterOpGen();
2242   //   __kmpc_end_master(ident_t *, gtid);
2243   // }
2244   // Prepare arguments and build a call to __kmpc_master
2245   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2246   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2247                             CGM.getModule(), OMPRTL___kmpc_master),
2248                         Args,
2249                         OMPBuilder.getOrCreateRuntimeFunction(
2250                             CGM.getModule(), OMPRTL___kmpc_end_master),
2251                         Args,
2252                         /*Conditional=*/true);
2253   MasterOpGen.setAction(Action);
2254   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2255   Action.Done(CGF);
2256 }
2257 
2258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2259                                         SourceLocation Loc) {
2260   if (!CGF.HaveInsertPoint())
2261     return;
2262   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2263     OMPBuilder.createTaskyield(CGF.Builder);
2264   } else {
2265     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2266     llvm::Value *Args[] = {
2267         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2268         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2269     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2270                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2271                         Args);
2272   }
2273 
2274   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2275     Region->emitUntiedSwitch(CGF);
2276 }
2277 
2278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2279                                           const RegionCodeGenTy &TaskgroupOpGen,
2280                                           SourceLocation Loc) {
2281   if (!CGF.HaveInsertPoint())
2282     return;
2283   // __kmpc_taskgroup(ident_t *, gtid);
2284   // TaskgroupOpGen();
2285   // __kmpc_end_taskgroup(ident_t *, gtid);
2286   // Prepare arguments and build a call to __kmpc_taskgroup
2287   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2288   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2289                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2290                         Args,
2291                         OMPBuilder.getOrCreateRuntimeFunction(
2292                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2293                         Args);
2294   TaskgroupOpGen.setAction(Action);
2295   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2296 }
2297 
2298 /// Given an array of pointers to variables, project the address of a
2299 /// given variable.
2300 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2301                                       unsigned Index, const VarDecl *Var) {
2302   // Pull out the pointer to the variable.
2303   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2304   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2305 
2306   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2307   Addr = CGF.Builder.CreateElementBitCast(
2308       Addr, CGF.ConvertTypeForMem(Var->getType()));
2309   return Addr;
2310 }
2311 
2312 static llvm::Value *emitCopyprivateCopyFunction(
2313     CodeGenModule &CGM, llvm::Type *ArgsType,
2314     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2315     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2316     SourceLocation Loc) {
2317   ASTContext &C = CGM.getContext();
2318   // void copy_func(void *LHSArg, void *RHSArg);
2319   FunctionArgList Args;
2320   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2321                            ImplicitParamDecl::Other);
2322   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2323                            ImplicitParamDecl::Other);
2324   Args.push_back(&LHSArg);
2325   Args.push_back(&RHSArg);
2326   const auto &CGFI =
2327       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2328   std::string Name =
2329       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2330   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2331                                     llvm::GlobalValue::InternalLinkage, Name,
2332                                     &CGM.getModule());
2333   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2334   Fn->setDoesNotRecurse();
2335   CodeGenFunction CGF(CGM);
2336   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2337   // Dest = (void*[n])(LHSArg);
2338   // Src = (void*[n])(RHSArg);
2339   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2340       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2341       ArgsType), CGF.getPointerAlign());
2342   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2343       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2344       ArgsType), CGF.getPointerAlign());
2345   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2346   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2347   // ...
2348   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2349   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2350     const auto *DestVar =
2351         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2352     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2353 
2354     const auto *SrcVar =
2355         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2356     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2357 
2358     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2359     QualType Type = VD->getType();
2360     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2361   }
2362   CGF.FinishFunction();
2363   return Fn;
2364 }
2365 
2366 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2367                                        const RegionCodeGenTy &SingleOpGen,
2368                                        SourceLocation Loc,
2369                                        ArrayRef<const Expr *> CopyprivateVars,
2370                                        ArrayRef<const Expr *> SrcExprs,
2371                                        ArrayRef<const Expr *> DstExprs,
2372                                        ArrayRef<const Expr *> AssignmentOps) {
2373   if (!CGF.HaveInsertPoint())
2374     return;
2375   assert(CopyprivateVars.size() == SrcExprs.size() &&
2376          CopyprivateVars.size() == DstExprs.size() &&
2377          CopyprivateVars.size() == AssignmentOps.size());
2378   ASTContext &C = CGM.getContext();
2379   // int32 did_it = 0;
2380   // if(__kmpc_single(ident_t *, gtid)) {
2381   //   SingleOpGen();
2382   //   __kmpc_end_single(ident_t *, gtid);
2383   //   did_it = 1;
2384   // }
2385   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2386   // <copy_func>, did_it);
2387 
2388   Address DidIt = Address::invalid();
2389   if (!CopyprivateVars.empty()) {
2390     // int32 did_it = 0;
2391     QualType KmpInt32Ty =
2392         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2393     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2394     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2395   }
2396   // Prepare arguments and build a call to __kmpc_single
2397   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2398   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2399                             CGM.getModule(), OMPRTL___kmpc_single),
2400                         Args,
2401                         OMPBuilder.getOrCreateRuntimeFunction(
2402                             CGM.getModule(), OMPRTL___kmpc_end_single),
2403                         Args,
2404                         /*Conditional=*/true);
2405   SingleOpGen.setAction(Action);
2406   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2407   if (DidIt.isValid()) {
2408     // did_it = 1;
2409     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2410   }
2411   Action.Done(CGF);
2412   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2413   // <copy_func>, did_it);
2414   if (DidIt.isValid()) {
2415     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2416     QualType CopyprivateArrayTy = C.getConstantArrayType(
2417         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2418         /*IndexTypeQuals=*/0);
2419     // Create a list of all private variables for copyprivate.
2420     Address CopyprivateList =
2421         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2422     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2423       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2424       CGF.Builder.CreateStore(
2425           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2426               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2427               CGF.VoidPtrTy),
2428           Elem);
2429     }
2430     // Build function that copies private values from single region to all other
2431     // threads in the corresponding parallel region.
2432     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2433         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2434         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2435     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2436     Address CL =
2437       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2438                                                       CGF.VoidPtrTy);
2439     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2440     llvm::Value *Args[] = {
2441         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2442         getThreadID(CGF, Loc),        // i32 <gtid>
2443         BufSize,                      // size_t <buf_size>
2444         CL.getPointer(),              // void *<copyprivate list>
2445         CpyFn,                        // void (*) (void *, void *) <copy_func>
2446         DidItVal                      // i32 did_it
2447     };
2448     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2449                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2450                         Args);
2451   }
2452 }
2453 
2454 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2455                                         const RegionCodeGenTy &OrderedOpGen,
2456                                         SourceLocation Loc, bool IsThreads) {
2457   if (!CGF.HaveInsertPoint())
2458     return;
2459   // __kmpc_ordered(ident_t *, gtid);
2460   // OrderedOpGen();
2461   // __kmpc_end_ordered(ident_t *, gtid);
2462   // Prepare arguments and build a call to __kmpc_ordered
2463   if (IsThreads) {
2464     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2465     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2466                               CGM.getModule(), OMPRTL___kmpc_ordered),
2467                           Args,
2468                           OMPBuilder.getOrCreateRuntimeFunction(
2469                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2470                           Args);
2471     OrderedOpGen.setAction(Action);
2472     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2473     return;
2474   }
2475   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2476 }
2477 
2478 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2479   unsigned Flags;
2480   if (Kind == OMPD_for)
2481     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2482   else if (Kind == OMPD_sections)
2483     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2484   else if (Kind == OMPD_single)
2485     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2486   else if (Kind == OMPD_barrier)
2487     Flags = OMP_IDENT_BARRIER_EXPL;
2488   else
2489     Flags = OMP_IDENT_BARRIER_IMPL;
2490   return Flags;
2491 }
2492 
2493 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2494     CodeGenFunction &CGF, const OMPLoopDirective &S,
2495     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2496   // Check if the loop directive is actually a doacross loop directive. In this
2497   // case choose static, 1 schedule.
2498   if (llvm::any_of(
2499           S.getClausesOfKind<OMPOrderedClause>(),
2500           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2501     ScheduleKind = OMPC_SCHEDULE_static;
2502     // Chunk size is 1 in this case.
2503     llvm::APInt ChunkSize(32, 1);
2504     ChunkExpr = IntegerLiteral::Create(
2505         CGF.getContext(), ChunkSize,
2506         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2507         SourceLocation());
2508   }
2509 }
2510 
2511 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2512                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2513                                       bool ForceSimpleCall) {
2514   // Check if we should use the OMPBuilder
2515   auto *OMPRegionInfo =
2516       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2517   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2518     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2519         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2520     return;
2521   }
2522 
2523   if (!CGF.HaveInsertPoint())
2524     return;
2525   // Build call __kmpc_cancel_barrier(loc, thread_id);
2526   // Build call __kmpc_barrier(loc, thread_id);
2527   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2528   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2529   // thread_id);
2530   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2531                          getThreadID(CGF, Loc)};
2532   if (OMPRegionInfo) {
2533     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2534       llvm::Value *Result = CGF.EmitRuntimeCall(
2535           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2536                                                 OMPRTL___kmpc_cancel_barrier),
2537           Args);
2538       if (EmitChecks) {
2539         // if (__kmpc_cancel_barrier()) {
2540         //   exit from construct;
2541         // }
2542         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2543         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2544         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2545         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2546         CGF.EmitBlock(ExitBB);
2547         //   exit from construct;
2548         CodeGenFunction::JumpDest CancelDestination =
2549             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2550         CGF.EmitBranchThroughCleanup(CancelDestination);
2551         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2552       }
2553       return;
2554     }
2555   }
2556   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2557                           CGM.getModule(), OMPRTL___kmpc_barrier),
2558                       Args);
2559 }
2560 
2561 /// Map the OpenMP loop schedule to the runtime enumeration.
2562 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2563                                           bool Chunked, bool Ordered) {
2564   switch (ScheduleKind) {
2565   case OMPC_SCHEDULE_static:
2566     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2567                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2568   case OMPC_SCHEDULE_dynamic:
2569     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2570   case OMPC_SCHEDULE_guided:
2571     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2572   case OMPC_SCHEDULE_runtime:
2573     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2574   case OMPC_SCHEDULE_auto:
2575     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2576   case OMPC_SCHEDULE_unknown:
2577     assert(!Chunked && "chunk was specified but schedule kind not known");
2578     return Ordered ? OMP_ord_static : OMP_sch_static;
2579   }
2580   llvm_unreachable("Unexpected runtime schedule");
2581 }
2582 
2583 /// Map the OpenMP distribute schedule to the runtime enumeration.
2584 static OpenMPSchedType
2585 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2586   // only static is allowed for dist_schedule
2587   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2588 }
2589 
2590 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2591                                          bool Chunked) const {
2592   OpenMPSchedType Schedule =
2593       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2594   return Schedule == OMP_sch_static;
2595 }
2596 
2597 bool CGOpenMPRuntime::isStaticNonchunked(
2598     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2599   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2600   return Schedule == OMP_dist_sch_static;
2601 }
2602 
2603 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2604                                       bool Chunked) const {
2605   OpenMPSchedType Schedule =
2606       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2607   return Schedule == OMP_sch_static_chunked;
2608 }
2609 
2610 bool CGOpenMPRuntime::isStaticChunked(
2611     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2612   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2613   return Schedule == OMP_dist_sch_static_chunked;
2614 }
2615 
2616 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2617   OpenMPSchedType Schedule =
2618       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2619   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2620   return Schedule != OMP_sch_static;
2621 }
2622 
2623 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2624                                   OpenMPScheduleClauseModifier M1,
2625                                   OpenMPScheduleClauseModifier M2) {
2626   int Modifier = 0;
2627   switch (M1) {
2628   case OMPC_SCHEDULE_MODIFIER_monotonic:
2629     Modifier = OMP_sch_modifier_monotonic;
2630     break;
2631   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2632     Modifier = OMP_sch_modifier_nonmonotonic;
2633     break;
2634   case OMPC_SCHEDULE_MODIFIER_simd:
2635     if (Schedule == OMP_sch_static_chunked)
2636       Schedule = OMP_sch_static_balanced_chunked;
2637     break;
2638   case OMPC_SCHEDULE_MODIFIER_last:
2639   case OMPC_SCHEDULE_MODIFIER_unknown:
2640     break;
2641   }
2642   switch (M2) {
2643   case OMPC_SCHEDULE_MODIFIER_monotonic:
2644     Modifier = OMP_sch_modifier_monotonic;
2645     break;
2646   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2647     Modifier = OMP_sch_modifier_nonmonotonic;
2648     break;
2649   case OMPC_SCHEDULE_MODIFIER_simd:
2650     if (Schedule == OMP_sch_static_chunked)
2651       Schedule = OMP_sch_static_balanced_chunked;
2652     break;
2653   case OMPC_SCHEDULE_MODIFIER_last:
2654   case OMPC_SCHEDULE_MODIFIER_unknown:
2655     break;
2656   }
2657   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2658   // If the static schedule kind is specified or if the ordered clause is
2659   // specified, and if the nonmonotonic modifier is not specified, the effect is
2660   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2661   // modifier is specified, the effect is as if the nonmonotonic modifier is
2662   // specified.
2663   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2664     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2665           Schedule == OMP_sch_static_balanced_chunked ||
2666           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2667           Schedule == OMP_dist_sch_static_chunked ||
2668           Schedule == OMP_dist_sch_static))
2669       Modifier = OMP_sch_modifier_nonmonotonic;
2670   }
2671   return Schedule | Modifier;
2672 }
2673 
2674 void CGOpenMPRuntime::emitForDispatchInit(
2675     CodeGenFunction &CGF, SourceLocation Loc,
2676     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2677     bool Ordered, const DispatchRTInput &DispatchValues) {
2678   if (!CGF.HaveInsertPoint())
2679     return;
2680   OpenMPSchedType Schedule = getRuntimeSchedule(
2681       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2682   assert(Ordered ||
2683          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2684           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2685           Schedule != OMP_sch_static_balanced_chunked));
2686   // Call __kmpc_dispatch_init(
2687   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2688   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2689   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2690 
2691   // If the Chunk was not specified in the clause - use default value 1.
2692   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2693                                             : CGF.Builder.getIntN(IVSize, 1);
2694   llvm::Value *Args[] = {
2695       emitUpdateLocation(CGF, Loc),
2696       getThreadID(CGF, Loc),
2697       CGF.Builder.getInt32(addMonoNonMonoModifier(
2698           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2699       DispatchValues.LB,                                     // Lower
2700       DispatchValues.UB,                                     // Upper
2701       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2702       Chunk                                                  // Chunk
2703   };
2704   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2705 }
2706 
2707 static void emitForStaticInitCall(
2708     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2709     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2710     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2711     const CGOpenMPRuntime::StaticRTInput &Values) {
2712   if (!CGF.HaveInsertPoint())
2713     return;
2714 
2715   assert(!Values.Ordered);
2716   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2717          Schedule == OMP_sch_static_balanced_chunked ||
2718          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2719          Schedule == OMP_dist_sch_static ||
2720          Schedule == OMP_dist_sch_static_chunked);
2721 
2722   // Call __kmpc_for_static_init(
2723   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2724   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2725   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2726   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2727   llvm::Value *Chunk = Values.Chunk;
2728   if (Chunk == nullptr) {
2729     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2730             Schedule == OMP_dist_sch_static) &&
2731            "expected static non-chunked schedule");
2732     // If the Chunk was not specified in the clause - use default value 1.
2733     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2734   } else {
2735     assert((Schedule == OMP_sch_static_chunked ||
2736             Schedule == OMP_sch_static_balanced_chunked ||
2737             Schedule == OMP_ord_static_chunked ||
2738             Schedule == OMP_dist_sch_static_chunked) &&
2739            "expected static chunked schedule");
2740   }
2741   llvm::Value *Args[] = {
2742       UpdateLocation,
2743       ThreadId,
2744       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2745                                                   M2)), // Schedule type
2746       Values.IL.getPointer(),                           // &isLastIter
2747       Values.LB.getPointer(),                           // &LB
2748       Values.UB.getPointer(),                           // &UB
2749       Values.ST.getPointer(),                           // &Stride
2750       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2751       Chunk                                             // Chunk
2752   };
2753   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2754 }
2755 
2756 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2757                                         SourceLocation Loc,
2758                                         OpenMPDirectiveKind DKind,
2759                                         const OpenMPScheduleTy &ScheduleKind,
2760                                         const StaticRTInput &Values) {
2761   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2762       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2763   assert(isOpenMPWorksharingDirective(DKind) &&
2764          "Expected loop-based or sections-based directive.");
2765   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2766                                              isOpenMPLoopDirective(DKind)
2767                                                  ? OMP_IDENT_WORK_LOOP
2768                                                  : OMP_IDENT_WORK_SECTIONS);
2769   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2770   llvm::FunctionCallee StaticInitFunction =
2771       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2772   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2773   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2774                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2775 }
2776 
2777 void CGOpenMPRuntime::emitDistributeStaticInit(
2778     CodeGenFunction &CGF, SourceLocation Loc,
2779     OpenMPDistScheduleClauseKind SchedKind,
2780     const CGOpenMPRuntime::StaticRTInput &Values) {
2781   OpenMPSchedType ScheduleNum =
2782       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2783   llvm::Value *UpdatedLocation =
2784       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2785   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2786   llvm::FunctionCallee StaticInitFunction =
2787       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2788   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2789                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2790                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2791 }
2792 
2793 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2794                                           SourceLocation Loc,
2795                                           OpenMPDirectiveKind DKind) {
2796   if (!CGF.HaveInsertPoint())
2797     return;
2798   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2799   llvm::Value *Args[] = {
2800       emitUpdateLocation(CGF, Loc,
2801                          isOpenMPDistributeDirective(DKind)
2802                              ? OMP_IDENT_WORK_DISTRIBUTE
2803                              : isOpenMPLoopDirective(DKind)
2804                                    ? OMP_IDENT_WORK_LOOP
2805                                    : OMP_IDENT_WORK_SECTIONS),
2806       getThreadID(CGF, Loc)};
2807   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2808   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2809                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2810                       Args);
2811 }
2812 
2813 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2814                                                  SourceLocation Loc,
2815                                                  unsigned IVSize,
2816                                                  bool IVSigned) {
2817   if (!CGF.HaveInsertPoint())
2818     return;
2819   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2820   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2821   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2822 }
2823 
2824 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2825                                           SourceLocation Loc, unsigned IVSize,
2826                                           bool IVSigned, Address IL,
2827                                           Address LB, Address UB,
2828                                           Address ST) {
2829   // Call __kmpc_dispatch_next(
2830   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2831   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2832   //          kmp_int[32|64] *p_stride);
2833   llvm::Value *Args[] = {
2834       emitUpdateLocation(CGF, Loc),
2835       getThreadID(CGF, Loc),
2836       IL.getPointer(), // &isLastIter
2837       LB.getPointer(), // &Lower
2838       UB.getPointer(), // &Upper
2839       ST.getPointer()  // &Stride
2840   };
2841   llvm::Value *Call =
2842       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2843   return CGF.EmitScalarConversion(
2844       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2845       CGF.getContext().BoolTy, Loc);
2846 }
2847 
2848 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2849                                            llvm::Value *NumThreads,
2850                                            SourceLocation Loc) {
2851   if (!CGF.HaveInsertPoint())
2852     return;
2853   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2854   llvm::Value *Args[] = {
2855       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2856       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2857   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2858                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2859                       Args);
2860 }
2861 
2862 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2863                                          ProcBindKind ProcBind,
2864                                          SourceLocation Loc) {
2865   if (!CGF.HaveInsertPoint())
2866     return;
2867   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2868   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2869   llvm::Value *Args[] = {
2870       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2871       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2872   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2873                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2874                       Args);
2875 }
2876 
2877 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2878                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2879   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2880     OMPBuilder.createFlush(CGF.Builder);
2881   } else {
2882     if (!CGF.HaveInsertPoint())
2883       return;
2884     // Build call void __kmpc_flush(ident_t *loc)
2885     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2886                             CGM.getModule(), OMPRTL___kmpc_flush),
2887                         emitUpdateLocation(CGF, Loc));
2888   }
2889 }
2890 
2891 namespace {
2892 /// Indexes of fields for type kmp_task_t.
2893 enum KmpTaskTFields {
2894   /// List of shared variables.
2895   KmpTaskTShareds,
2896   /// Task routine.
2897   KmpTaskTRoutine,
2898   /// Partition id for the untied tasks.
2899   KmpTaskTPartId,
2900   /// Function with call of destructors for private variables.
2901   Data1,
2902   /// Task priority.
2903   Data2,
2904   /// (Taskloops only) Lower bound.
2905   KmpTaskTLowerBound,
2906   /// (Taskloops only) Upper bound.
2907   KmpTaskTUpperBound,
2908   /// (Taskloops only) Stride.
2909   KmpTaskTStride,
2910   /// (Taskloops only) Is last iteration flag.
2911   KmpTaskTLastIter,
2912   /// (Taskloops only) Reduction data.
2913   KmpTaskTReductions,
2914 };
2915 } // anonymous namespace
2916 
2917 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2918   return OffloadEntriesTargetRegion.empty() &&
2919          OffloadEntriesDeviceGlobalVar.empty();
2920 }
2921 
2922 /// Initialize target region entry.
2923 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2924     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2925                                     StringRef ParentName, unsigned LineNum,
2926                                     unsigned Order) {
2927   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2928                                              "only required for the device "
2929                                              "code generation.");
2930   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2931       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2932                                    OMPTargetRegionEntryTargetRegion);
2933   ++OffloadingEntriesNum;
2934 }
2935 
2936 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2937     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2938                                   StringRef ParentName, unsigned LineNum,
2939                                   llvm::Constant *Addr, llvm::Constant *ID,
2940                                   OMPTargetRegionEntryKind Flags) {
2941   // If we are emitting code for a target, the entry is already initialized,
2942   // only has to be registered.
2943   if (CGM.getLangOpts().OpenMPIsDevice) {
2944     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
2945       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2946           DiagnosticsEngine::Error,
2947           "Unable to find target region on line '%0' in the device code.");
2948       CGM.getDiags().Report(DiagID) << LineNum;
2949       return;
2950     }
2951     auto &Entry =
2952         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2953     assert(Entry.isValid() && "Entry not initialized!");
2954     Entry.setAddress(Addr);
2955     Entry.setID(ID);
2956     Entry.setFlags(Flags);
2957   } else {
2958     if (Flags ==
2959             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2960         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2961                                  /*IgnoreAddressId*/ true))
2962       return;
2963     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2964            "Target region entry already registered!");
2965     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2966     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2967     ++OffloadingEntriesNum;
2968   }
2969 }
2970 
2971 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2972     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2973     bool IgnoreAddressId) const {
2974   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2975   if (PerDevice == OffloadEntriesTargetRegion.end())
2976     return false;
2977   auto PerFile = PerDevice->second.find(FileID);
2978   if (PerFile == PerDevice->second.end())
2979     return false;
2980   auto PerParentName = PerFile->second.find(ParentName);
2981   if (PerParentName == PerFile->second.end())
2982     return false;
2983   auto PerLine = PerParentName->second.find(LineNum);
2984   if (PerLine == PerParentName->second.end())
2985     return false;
2986   // Fail if this entry is already registered.
2987   if (!IgnoreAddressId &&
2988       (PerLine->second.getAddress() || PerLine->second.getID()))
2989     return false;
2990   return true;
2991 }
2992 
2993 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2994     const OffloadTargetRegionEntryInfoActTy &Action) {
2995   // Scan all target region entries and perform the provided action.
2996   for (const auto &D : OffloadEntriesTargetRegion)
2997     for (const auto &F : D.second)
2998       for (const auto &P : F.second)
2999         for (const auto &L : P.second)
3000           Action(D.first, F.first, P.first(), L.first, L.second);
3001 }
3002 
3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3004     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3005                                        OMPTargetGlobalVarEntryKind Flags,
3006                                        unsigned Order) {
3007   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3008                                              "only required for the device "
3009                                              "code generation.");
3010   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3011   ++OffloadingEntriesNum;
3012 }
3013 
3014 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3015     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3016                                      CharUnits VarSize,
3017                                      OMPTargetGlobalVarEntryKind Flags,
3018                                      llvm::GlobalValue::LinkageTypes Linkage) {
3019   if (CGM.getLangOpts().OpenMPIsDevice) {
3020     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3021     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3022            "Entry not initialized!");
3023     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3024            "Resetting with the new address.");
3025     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3026       if (Entry.getVarSize().isZero()) {
3027         Entry.setVarSize(VarSize);
3028         Entry.setLinkage(Linkage);
3029       }
3030       return;
3031     }
3032     Entry.setVarSize(VarSize);
3033     Entry.setLinkage(Linkage);
3034     Entry.setAddress(Addr);
3035   } else {
3036     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3037       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3038       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3039              "Entry not initialized!");
3040       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3041              "Resetting with the new address.");
3042       if (Entry.getVarSize().isZero()) {
3043         Entry.setVarSize(VarSize);
3044         Entry.setLinkage(Linkage);
3045       }
3046       return;
3047     }
3048     OffloadEntriesDeviceGlobalVar.try_emplace(
3049         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3050     ++OffloadingEntriesNum;
3051   }
3052 }
3053 
3054 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3055     actOnDeviceGlobalVarEntriesInfo(
3056         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3057   // Scan all target region entries and perform the provided action.
3058   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3059     Action(E.getKey(), E.getValue());
3060 }
3061 
3062 void CGOpenMPRuntime::createOffloadEntry(
3063     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3064     llvm::GlobalValue::LinkageTypes Linkage) {
3065   StringRef Name = Addr->getName();
3066   llvm::Module &M = CGM.getModule();
3067   llvm::LLVMContext &C = M.getContext();
3068 
3069   // Create constant string with the name.
3070   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3071 
3072   std::string StringName = getName({"omp_offloading", "entry_name"});
3073   auto *Str = new llvm::GlobalVariable(
3074       M, StrPtrInit->getType(), /*isConstant=*/true,
3075       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3076   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3077 
3078   llvm::Constant *Data[] = {
3079       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3080       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3081       llvm::ConstantInt::get(CGM.SizeTy, Size),
3082       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3083       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3084   std::string EntryName = getName({"omp_offloading", "entry", ""});
3085   llvm::GlobalVariable *Entry = createGlobalStruct(
3086       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3087       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3088 
3089   // The entry has to be created in the section the linker expects it to be.
3090   Entry->setSection("omp_offloading_entries");
3091 }
3092 
3093 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3094   // Emit the offloading entries and metadata so that the device codegen side
3095   // can easily figure out what to emit. The produced metadata looks like
3096   // this:
3097   //
3098   // !omp_offload.info = !{!1, ...}
3099   //
3100   // Right now we only generate metadata for function that contain target
3101   // regions.
3102 
3103   // If we are in simd mode or there are no entries, we don't need to do
3104   // anything.
3105   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3106     return;
3107 
3108   llvm::Module &M = CGM.getModule();
3109   llvm::LLVMContext &C = M.getContext();
3110   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3111                          SourceLocation, StringRef>,
3112               16>
3113       OrderedEntries(OffloadEntriesInfoManager.size());
3114   llvm::SmallVector<StringRef, 16> ParentFunctions(
3115       OffloadEntriesInfoManager.size());
3116 
3117   // Auxiliary methods to create metadata values and strings.
3118   auto &&GetMDInt = [this](unsigned V) {
3119     return llvm::ConstantAsMetadata::get(
3120         llvm::ConstantInt::get(CGM.Int32Ty, V));
3121   };
3122 
3123   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3124 
3125   // Create the offloading info metadata node.
3126   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3127 
3128   // Create function that emits metadata for each target region entry;
3129   auto &&TargetRegionMetadataEmitter =
3130       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3131        &GetMDString](
3132           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3133           unsigned Line,
3134           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3135         // Generate metadata for target regions. Each entry of this metadata
3136         // contains:
3137         // - Entry 0 -> Kind of this type of metadata (0).
3138         // - Entry 1 -> Device ID of the file where the entry was identified.
3139         // - Entry 2 -> File ID of the file where the entry was identified.
3140         // - Entry 3 -> Mangled name of the function where the entry was
3141         // identified.
3142         // - Entry 4 -> Line in the file where the entry was identified.
3143         // - Entry 5 -> Order the entry was created.
3144         // The first element of the metadata node is the kind.
3145         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3146                                  GetMDInt(FileID),      GetMDString(ParentName),
3147                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3148 
3149         SourceLocation Loc;
3150         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3151                   E = CGM.getContext().getSourceManager().fileinfo_end();
3152              I != E; ++I) {
3153           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3154               I->getFirst()->getUniqueID().getFile() == FileID) {
3155             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3156                 I->getFirst(), Line, 1);
3157             break;
3158           }
3159         }
3160         // Save this entry in the right position of the ordered entries array.
3161         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3162         ParentFunctions[E.getOrder()] = ParentName;
3163 
3164         // Add metadata to the named metadata node.
3165         MD->addOperand(llvm::MDNode::get(C, Ops));
3166       };
3167 
3168   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3169       TargetRegionMetadataEmitter);
3170 
3171   // Create function that emits metadata for each device global variable entry;
3172   auto &&DeviceGlobalVarMetadataEmitter =
3173       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3174        MD](StringRef MangledName,
3175            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3176                &E) {
3177         // Generate metadata for global variables. Each entry of this metadata
3178         // contains:
3179         // - Entry 0 -> Kind of this type of metadata (1).
3180         // - Entry 1 -> Mangled name of the variable.
3181         // - Entry 2 -> Declare target kind.
3182         // - Entry 3 -> Order the entry was created.
3183         // The first element of the metadata node is the kind.
3184         llvm::Metadata *Ops[] = {
3185             GetMDInt(E.getKind()), GetMDString(MangledName),
3186             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3187 
3188         // Save this entry in the right position of the ordered entries array.
3189         OrderedEntries[E.getOrder()] =
3190             std::make_tuple(&E, SourceLocation(), MangledName);
3191 
3192         // Add metadata to the named metadata node.
3193         MD->addOperand(llvm::MDNode::get(C, Ops));
3194       };
3195 
3196   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3197       DeviceGlobalVarMetadataEmitter);
3198 
3199   for (const auto &E : OrderedEntries) {
3200     assert(std::get<0>(E) && "All ordered entries must exist!");
3201     if (const auto *CE =
3202             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3203                 std::get<0>(E))) {
3204       if (!CE->getID() || !CE->getAddress()) {
3205         // Do not blame the entry if the parent funtion is not emitted.
3206         StringRef FnName = ParentFunctions[CE->getOrder()];
3207         if (!CGM.GetGlobalValue(FnName))
3208           continue;
3209         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3210             DiagnosticsEngine::Error,
3211             "Offloading entry for target region in %0 is incorrect: either the "
3212             "address or the ID is invalid.");
3213         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3214         continue;
3215       }
3216       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3217                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3218     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3219                                              OffloadEntryInfoDeviceGlobalVar>(
3220                    std::get<0>(E))) {
3221       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3222           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3223               CE->getFlags());
3224       switch (Flags) {
3225       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3226         if (CGM.getLangOpts().OpenMPIsDevice &&
3227             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3228           continue;
3229         if (!CE->getAddress()) {
3230           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3231               DiagnosticsEngine::Error, "Offloading entry for declare target "
3232                                         "variable %0 is incorrect: the "
3233                                         "address is invalid.");
3234           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3235           continue;
3236         }
3237         // The vaiable has no definition - no need to add the entry.
3238         if (CE->getVarSize().isZero())
3239           continue;
3240         break;
3241       }
3242       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3243         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3244                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3245                "Declaret target link address is set.");
3246         if (CGM.getLangOpts().OpenMPIsDevice)
3247           continue;
3248         if (!CE->getAddress()) {
3249           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3250               DiagnosticsEngine::Error,
3251               "Offloading entry for declare target variable is incorrect: the "
3252               "address is invalid.");
3253           CGM.getDiags().Report(DiagID);
3254           continue;
3255         }
3256         break;
3257       }
3258       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3259                          CE->getVarSize().getQuantity(), Flags,
3260                          CE->getLinkage());
3261     } else {
3262       llvm_unreachable("Unsupported entry kind.");
3263     }
3264   }
3265 }
3266 
3267 /// Loads all the offload entries information from the host IR
3268 /// metadata.
3269 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3270   // If we are in target mode, load the metadata from the host IR. This code has
3271   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3272 
3273   if (!CGM.getLangOpts().OpenMPIsDevice)
3274     return;
3275 
3276   if (CGM.getLangOpts().OMPHostIRFile.empty())
3277     return;
3278 
3279   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3280   if (auto EC = Buf.getError()) {
3281     CGM.getDiags().Report(diag::err_cannot_open_file)
3282         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3283     return;
3284   }
3285 
3286   llvm::LLVMContext C;
3287   auto ME = expectedToErrorOrAndEmitErrors(
3288       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3289 
3290   if (auto EC = ME.getError()) {
3291     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3292         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3293     CGM.getDiags().Report(DiagID)
3294         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3295     return;
3296   }
3297 
3298   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3299   if (!MD)
3300     return;
3301 
3302   for (llvm::MDNode *MN : MD->operands()) {
3303     auto &&GetMDInt = [MN](unsigned Idx) {
3304       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3305       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3306     };
3307 
3308     auto &&GetMDString = [MN](unsigned Idx) {
3309       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3310       return V->getString();
3311     };
3312 
3313     switch (GetMDInt(0)) {
3314     default:
3315       llvm_unreachable("Unexpected metadata!");
3316       break;
3317     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3318         OffloadingEntryInfoTargetRegion:
3319       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3320           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3321           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3322           /*Order=*/GetMDInt(5));
3323       break;
3324     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3325         OffloadingEntryInfoDeviceGlobalVar:
3326       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3327           /*MangledName=*/GetMDString(1),
3328           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3329               /*Flags=*/GetMDInt(2)),
3330           /*Order=*/GetMDInt(3));
3331       break;
3332     }
3333   }
3334 }
3335 
3336 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3337   if (!KmpRoutineEntryPtrTy) {
3338     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3339     ASTContext &C = CGM.getContext();
3340     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3341     FunctionProtoType::ExtProtoInfo EPI;
3342     KmpRoutineEntryPtrQTy = C.getPointerType(
3343         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3344     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3345   }
3346 }
3347 
3348 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3349   // Make sure the type of the entry is already created. This is the type we
3350   // have to create:
3351   // struct __tgt_offload_entry{
3352   //   void      *addr;       // Pointer to the offload entry info.
3353   //                          // (function or global)
3354   //   char      *name;       // Name of the function or global.
3355   //   size_t     size;       // Size of the entry info (0 if it a function).
3356   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3357   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3358   // };
3359   if (TgtOffloadEntryQTy.isNull()) {
3360     ASTContext &C = CGM.getContext();
3361     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3362     RD->startDefinition();
3363     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3364     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3365     addFieldToRecordDecl(C, RD, C.getSizeType());
3366     addFieldToRecordDecl(
3367         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3368     addFieldToRecordDecl(
3369         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3370     RD->completeDefinition();
3371     RD->addAttr(PackedAttr::CreateImplicit(C));
3372     TgtOffloadEntryQTy = C.getRecordType(RD);
3373   }
3374   return TgtOffloadEntryQTy;
3375 }
3376 
3377 namespace {
3378 struct PrivateHelpersTy {
3379   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3380                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3381       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3382         PrivateElemInit(PrivateElemInit) {}
3383   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3384   const Expr *OriginalRef = nullptr;
3385   const VarDecl *Original = nullptr;
3386   const VarDecl *PrivateCopy = nullptr;
3387   const VarDecl *PrivateElemInit = nullptr;
3388   bool isLocalPrivate() const {
3389     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3390   }
3391 };
3392 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3393 } // anonymous namespace
3394 
3395 static bool isAllocatableDecl(const VarDecl *VD) {
3396   const VarDecl *CVD = VD->getCanonicalDecl();
3397   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3398     return false;
3399   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3400   // Use the default allocation.
3401   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3402             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3403            !AA->getAllocator());
3404 }
3405 
3406 static RecordDecl *
3407 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3408   if (!Privates.empty()) {
3409     ASTContext &C = CGM.getContext();
3410     // Build struct .kmp_privates_t. {
3411     //         /*  private vars  */
3412     //       };
3413     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3414     RD->startDefinition();
3415     for (const auto &Pair : Privates) {
3416       const VarDecl *VD = Pair.second.Original;
3417       QualType Type = VD->getType().getNonReferenceType();
3418       // If the private variable is a local variable with lvalue ref type,
3419       // allocate the pointer instead of the pointee type.
3420       if (Pair.second.isLocalPrivate()) {
3421         if (VD->getType()->isLValueReferenceType())
3422           Type = C.getPointerType(Type);
3423         if (isAllocatableDecl(VD))
3424           Type = C.getPointerType(Type);
3425       }
3426       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3427       if (VD->hasAttrs()) {
3428         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3429              E(VD->getAttrs().end());
3430              I != E; ++I)
3431           FD->addAttr(*I);
3432       }
3433     }
3434     RD->completeDefinition();
3435     return RD;
3436   }
3437   return nullptr;
3438 }
3439 
3440 static RecordDecl *
3441 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3442                          QualType KmpInt32Ty,
3443                          QualType KmpRoutineEntryPointerQTy) {
3444   ASTContext &C = CGM.getContext();
3445   // Build struct kmp_task_t {
3446   //         void *              shareds;
3447   //         kmp_routine_entry_t routine;
3448   //         kmp_int32           part_id;
3449   //         kmp_cmplrdata_t data1;
3450   //         kmp_cmplrdata_t data2;
3451   // For taskloops additional fields:
3452   //         kmp_uint64          lb;
3453   //         kmp_uint64          ub;
3454   //         kmp_int64           st;
3455   //         kmp_int32           liter;
3456   //         void *              reductions;
3457   //       };
3458   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3459   UD->startDefinition();
3460   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3461   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3462   UD->completeDefinition();
3463   QualType KmpCmplrdataTy = C.getRecordType(UD);
3464   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3465   RD->startDefinition();
3466   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3467   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3468   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3469   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3470   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3471   if (isOpenMPTaskLoopDirective(Kind)) {
3472     QualType KmpUInt64Ty =
3473         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3474     QualType KmpInt64Ty =
3475         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3476     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3477     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3478     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3479     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3480     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3481   }
3482   RD->completeDefinition();
3483   return RD;
3484 }
3485 
3486 static RecordDecl *
3487 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3488                                      ArrayRef<PrivateDataTy> Privates) {
3489   ASTContext &C = CGM.getContext();
3490   // Build struct kmp_task_t_with_privates {
3491   //         kmp_task_t task_data;
3492   //         .kmp_privates_t. privates;
3493   //       };
3494   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3495   RD->startDefinition();
3496   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3497   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3498     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3499   RD->completeDefinition();
3500   return RD;
3501 }
3502 
3503 /// Emit a proxy function which accepts kmp_task_t as the second
3504 /// argument.
3505 /// \code
3506 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3507 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3508 ///   For taskloops:
3509 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3510 ///   tt->reductions, tt->shareds);
3511 ///   return 0;
3512 /// }
3513 /// \endcode
3514 static llvm::Function *
3515 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3516                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3517                       QualType KmpTaskTWithPrivatesPtrQTy,
3518                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3519                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3520                       llvm::Value *TaskPrivatesMap) {
3521   ASTContext &C = CGM.getContext();
3522   FunctionArgList Args;
3523   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3524                             ImplicitParamDecl::Other);
3525   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3526                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3527                                 ImplicitParamDecl::Other);
3528   Args.push_back(&GtidArg);
3529   Args.push_back(&TaskTypeArg);
3530   const auto &TaskEntryFnInfo =
3531       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3532   llvm::FunctionType *TaskEntryTy =
3533       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3534   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3535   auto *TaskEntry = llvm::Function::Create(
3536       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3537   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3538   TaskEntry->setDoesNotRecurse();
3539   CodeGenFunction CGF(CGM);
3540   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3541                     Loc, Loc);
3542 
3543   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3544   // tt,
3545   // For taskloops:
3546   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3547   // tt->task_data.shareds);
3548   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3549       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3550   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3551       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3552       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3553   const auto *KmpTaskTWithPrivatesQTyRD =
3554       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3555   LValue Base =
3556       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3557   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3558   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3559   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3560   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3561 
3562   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3563   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3564   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3565       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3566       CGF.ConvertTypeForMem(SharedsPtrTy));
3567 
3568   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3569   llvm::Value *PrivatesParam;
3570   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3571     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3572     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3573         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3574   } else {
3575     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3576   }
3577 
3578   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3579                                TaskPrivatesMap,
3580                                CGF.Builder
3581                                    .CreatePointerBitCastOrAddrSpaceCast(
3582                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3583                                    .getPointer()};
3584   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3585                                           std::end(CommonArgs));
3586   if (isOpenMPTaskLoopDirective(Kind)) {
3587     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3588     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3589     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3590     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3591     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3592     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3593     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3594     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3595     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3596     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3597     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3598     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3599     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3600     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3601     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3602     CallArgs.push_back(LBParam);
3603     CallArgs.push_back(UBParam);
3604     CallArgs.push_back(StParam);
3605     CallArgs.push_back(LIParam);
3606     CallArgs.push_back(RParam);
3607   }
3608   CallArgs.push_back(SharedsParam);
3609 
3610   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3611                                                   CallArgs);
3612   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3613                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3614   CGF.FinishFunction();
3615   return TaskEntry;
3616 }
3617 
3618 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3619                                             SourceLocation Loc,
3620                                             QualType KmpInt32Ty,
3621                                             QualType KmpTaskTWithPrivatesPtrQTy,
3622                                             QualType KmpTaskTWithPrivatesQTy) {
3623   ASTContext &C = CGM.getContext();
3624   FunctionArgList Args;
3625   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3626                             ImplicitParamDecl::Other);
3627   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3628                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3629                                 ImplicitParamDecl::Other);
3630   Args.push_back(&GtidArg);
3631   Args.push_back(&TaskTypeArg);
3632   const auto &DestructorFnInfo =
3633       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3634   llvm::FunctionType *DestructorFnTy =
3635       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3636   std::string Name =
3637       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3638   auto *DestructorFn =
3639       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3640                              Name, &CGM.getModule());
3641   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3642                                     DestructorFnInfo);
3643   DestructorFn->setDoesNotRecurse();
3644   CodeGenFunction CGF(CGM);
3645   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3646                     Args, Loc, Loc);
3647 
3648   LValue Base = CGF.EmitLoadOfPointerLValue(
3649       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3650       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3651   const auto *KmpTaskTWithPrivatesQTyRD =
3652       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3653   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3654   Base = CGF.EmitLValueForField(Base, *FI);
3655   for (const auto *Field :
3656        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3657     if (QualType::DestructionKind DtorKind =
3658             Field->getType().isDestructedType()) {
3659       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3660       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3661     }
3662   }
3663   CGF.FinishFunction();
3664   return DestructorFn;
3665 }
3666 
3667 /// Emit a privates mapping function for correct handling of private and
3668 /// firstprivate variables.
3669 /// \code
3670 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3671 /// **noalias priv1,...,  <tyn> **noalias privn) {
3672 ///   *priv1 = &.privates.priv1;
3673 ///   ...;
3674 ///   *privn = &.privates.privn;
3675 /// }
3676 /// \endcode
3677 static llvm::Value *
3678 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3679                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3680                                ArrayRef<PrivateDataTy> Privates) {
3681   ASTContext &C = CGM.getContext();
3682   FunctionArgList Args;
3683   ImplicitParamDecl TaskPrivatesArg(
3684       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3685       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3686       ImplicitParamDecl::Other);
3687   Args.push_back(&TaskPrivatesArg);
3688   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3689   unsigned Counter = 1;
3690   for (const Expr *E : Data.PrivateVars) {
3691     Args.push_back(ImplicitParamDecl::Create(
3692         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3693         C.getPointerType(C.getPointerType(E->getType()))
3694             .withConst()
3695             .withRestrict(),
3696         ImplicitParamDecl::Other));
3697     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3698     PrivateVarsPos[VD] = Counter;
3699     ++Counter;
3700   }
3701   for (const Expr *E : Data.FirstprivateVars) {
3702     Args.push_back(ImplicitParamDecl::Create(
3703         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3704         C.getPointerType(C.getPointerType(E->getType()))
3705             .withConst()
3706             .withRestrict(),
3707         ImplicitParamDecl::Other));
3708     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3709     PrivateVarsPos[VD] = Counter;
3710     ++Counter;
3711   }
3712   for (const Expr *E : Data.LastprivateVars) {
3713     Args.push_back(ImplicitParamDecl::Create(
3714         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3715         C.getPointerType(C.getPointerType(E->getType()))
3716             .withConst()
3717             .withRestrict(),
3718         ImplicitParamDecl::Other));
3719     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3720     PrivateVarsPos[VD] = Counter;
3721     ++Counter;
3722   }
3723   for (const VarDecl *VD : Data.PrivateLocals) {
3724     QualType Ty = VD->getType().getNonReferenceType();
3725     if (VD->getType()->isLValueReferenceType())
3726       Ty = C.getPointerType(Ty);
3727     if (isAllocatableDecl(VD))
3728       Ty = C.getPointerType(Ty);
3729     Args.push_back(ImplicitParamDecl::Create(
3730         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3731         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3732         ImplicitParamDecl::Other));
3733     PrivateVarsPos[VD] = Counter;
3734     ++Counter;
3735   }
3736   const auto &TaskPrivatesMapFnInfo =
3737       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3738   llvm::FunctionType *TaskPrivatesMapTy =
3739       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3740   std::string Name =
3741       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3742   auto *TaskPrivatesMap = llvm::Function::Create(
3743       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3744       &CGM.getModule());
3745   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3746                                     TaskPrivatesMapFnInfo);
3747   if (CGM.getLangOpts().Optimize) {
3748     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3749     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3750     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3751   }
3752   CodeGenFunction CGF(CGM);
3753   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3754                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3755 
3756   // *privi = &.privates.privi;
3757   LValue Base = CGF.EmitLoadOfPointerLValue(
3758       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3759       TaskPrivatesArg.getType()->castAs<PointerType>());
3760   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3761   Counter = 0;
3762   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3763     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3764     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3765     LValue RefLVal =
3766         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3767     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3768         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3769     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3770     ++Counter;
3771   }
3772   CGF.FinishFunction();
3773   return TaskPrivatesMap;
3774 }
3775 
3776 /// Emit initialization for private variables in task-based directives.
3777 static void emitPrivatesInit(CodeGenFunction &CGF,
3778                              const OMPExecutableDirective &D,
3779                              Address KmpTaskSharedsPtr, LValue TDBase,
3780                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3781                              QualType SharedsTy, QualType SharedsPtrTy,
3782                              const OMPTaskDataTy &Data,
3783                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3784   ASTContext &C = CGF.getContext();
3785   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3786   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3787   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3788                                  ? OMPD_taskloop
3789                                  : OMPD_task;
3790   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3791   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3792   LValue SrcBase;
3793   bool IsTargetTask =
3794       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3795       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3796   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3797   // PointersArray, SizesArray, and MappersArray. The original variables for
3798   // these arrays are not captured and we get their addresses explicitly.
3799   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3800       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3801     SrcBase = CGF.MakeAddrLValue(
3802         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3803             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3804         SharedsTy);
3805   }
3806   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3807   for (const PrivateDataTy &Pair : Privates) {
3808     // Do not initialize private locals.
3809     if (Pair.second.isLocalPrivate()) {
3810       ++FI;
3811       continue;
3812     }
3813     const VarDecl *VD = Pair.second.PrivateCopy;
3814     const Expr *Init = VD->getAnyInitializer();
3815     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3816                              !CGF.isTrivialInitializer(Init)))) {
3817       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3818       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3819         const VarDecl *OriginalVD = Pair.second.Original;
3820         // Check if the variable is the target-based BasePointersArray,
3821         // PointersArray, SizesArray, or MappersArray.
3822         LValue SharedRefLValue;
3823         QualType Type = PrivateLValue.getType();
3824         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3825         if (IsTargetTask && !SharedField) {
3826           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3827                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3828                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3829                          ->getNumParams() == 0 &&
3830                  isa<TranslationUnitDecl>(
3831                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3832                          ->getDeclContext()) &&
3833                  "Expected artificial target data variable.");
3834           SharedRefLValue =
3835               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3836         } else if (ForDup) {
3837           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3838           SharedRefLValue = CGF.MakeAddrLValue(
3839               Address(SharedRefLValue.getPointer(CGF),
3840                       C.getDeclAlign(OriginalVD)),
3841               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3842               SharedRefLValue.getTBAAInfo());
3843         } else if (CGF.LambdaCaptureFields.count(
3844                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3845                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3846           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3847         } else {
3848           // Processing for implicitly captured variables.
3849           InlinedOpenMPRegionRAII Region(
3850               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3851               /*HasCancel=*/false);
3852           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3853         }
3854         if (Type->isArrayType()) {
3855           // Initialize firstprivate array.
3856           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3857             // Perform simple memcpy.
3858             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3859           } else {
3860             // Initialize firstprivate array using element-by-element
3861             // initialization.
3862             CGF.EmitOMPAggregateAssign(
3863                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3864                 Type,
3865                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3866                                                   Address SrcElement) {
3867                   // Clean up any temporaries needed by the initialization.
3868                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3869                   InitScope.addPrivate(
3870                       Elem, [SrcElement]() -> Address { return SrcElement; });
3871                   (void)InitScope.Privatize();
3872                   // Emit initialization for single element.
3873                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3874                       CGF, &CapturesInfo);
3875                   CGF.EmitAnyExprToMem(Init, DestElement,
3876                                        Init->getType().getQualifiers(),
3877                                        /*IsInitializer=*/false);
3878                 });
3879           }
3880         } else {
3881           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3882           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3883             return SharedRefLValue.getAddress(CGF);
3884           });
3885           (void)InitScope.Privatize();
3886           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3887           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3888                              /*capturedByInit=*/false);
3889         }
3890       } else {
3891         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3892       }
3893     }
3894     ++FI;
3895   }
3896 }
3897 
3898 /// Check if duplication function is required for taskloops.
3899 static bool checkInitIsRequired(CodeGenFunction &CGF,
3900                                 ArrayRef<PrivateDataTy> Privates) {
3901   bool InitRequired = false;
3902   for (const PrivateDataTy &Pair : Privates) {
3903     if (Pair.second.isLocalPrivate())
3904       continue;
3905     const VarDecl *VD = Pair.second.PrivateCopy;
3906     const Expr *Init = VD->getAnyInitializer();
3907     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3908                                     !CGF.isTrivialInitializer(Init));
3909     if (InitRequired)
3910       break;
3911   }
3912   return InitRequired;
3913 }
3914 
3915 
3916 /// Emit task_dup function (for initialization of
3917 /// private/firstprivate/lastprivate vars and last_iter flag)
3918 /// \code
3919 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3920 /// lastpriv) {
3921 /// // setup lastprivate flag
3922 ///    task_dst->last = lastpriv;
3923 /// // could be constructor calls here...
3924 /// }
3925 /// \endcode
3926 static llvm::Value *
3927 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3928                     const OMPExecutableDirective &D,
3929                     QualType KmpTaskTWithPrivatesPtrQTy,
3930                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3931                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3932                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3933                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3934   ASTContext &C = CGM.getContext();
3935   FunctionArgList Args;
3936   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3937                            KmpTaskTWithPrivatesPtrQTy,
3938                            ImplicitParamDecl::Other);
3939   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3940                            KmpTaskTWithPrivatesPtrQTy,
3941                            ImplicitParamDecl::Other);
3942   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3943                                 ImplicitParamDecl::Other);
3944   Args.push_back(&DstArg);
3945   Args.push_back(&SrcArg);
3946   Args.push_back(&LastprivArg);
3947   const auto &TaskDupFnInfo =
3948       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3949   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3950   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3951   auto *TaskDup = llvm::Function::Create(
3952       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3953   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3954   TaskDup->setDoesNotRecurse();
3955   CodeGenFunction CGF(CGM);
3956   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3957                     Loc);
3958 
3959   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3960       CGF.GetAddrOfLocalVar(&DstArg),
3961       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3962   // task_dst->liter = lastpriv;
3963   if (WithLastIter) {
3964     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3965     LValue Base = CGF.EmitLValueForField(
3966         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3967     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3968     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3969         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3970     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3971   }
3972 
3973   // Emit initial values for private copies (if any).
3974   assert(!Privates.empty());
3975   Address KmpTaskSharedsPtr = Address::invalid();
3976   if (!Data.FirstprivateVars.empty()) {
3977     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3978         CGF.GetAddrOfLocalVar(&SrcArg),
3979         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3980     LValue Base = CGF.EmitLValueForField(
3981         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3982     KmpTaskSharedsPtr = Address(
3983         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3984                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3985                                                   KmpTaskTShareds)),
3986                              Loc),
3987         CGM.getNaturalTypeAlignment(SharedsTy));
3988   }
3989   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3990                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3991   CGF.FinishFunction();
3992   return TaskDup;
3993 }
3994 
3995 /// Checks if destructor function is required to be generated.
3996 /// \return true if cleanups are required, false otherwise.
3997 static bool
3998 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3999                          ArrayRef<PrivateDataTy> Privates) {
4000   for (const PrivateDataTy &P : Privates) {
4001     if (P.second.isLocalPrivate())
4002       continue;
4003     QualType Ty = P.second.Original->getType().getNonReferenceType();
4004     if (Ty.isDestructedType())
4005       return true;
4006   }
4007   return false;
4008 }
4009 
4010 namespace {
4011 /// Loop generator for OpenMP iterator expression.
4012 class OMPIteratorGeneratorScope final
4013     : public CodeGenFunction::OMPPrivateScope {
4014   CodeGenFunction &CGF;
4015   const OMPIteratorExpr *E = nullptr;
4016   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4017   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4018   OMPIteratorGeneratorScope() = delete;
4019   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4020 
4021 public:
4022   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4023       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4024     if (!E)
4025       return;
4026     SmallVector<llvm::Value *, 4> Uppers;
4027     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4028       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4029       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4030       addPrivate(VD, [&CGF, VD]() {
4031         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4032       });
4033       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4034       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4035         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4036                                  "counter.addr");
4037       });
4038     }
4039     Privatize();
4040 
4041     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4042       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4043       LValue CLVal =
4044           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4045                              HelperData.CounterVD->getType());
4046       // Counter = 0;
4047       CGF.EmitStoreOfScalar(
4048           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4049           CLVal);
4050       CodeGenFunction::JumpDest &ContDest =
4051           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4052       CodeGenFunction::JumpDest &ExitDest =
4053           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4054       // N = <number-of_iterations>;
4055       llvm::Value *N = Uppers[I];
4056       // cont:
4057       // if (Counter < N) goto body; else goto exit;
4058       CGF.EmitBlock(ContDest.getBlock());
4059       auto *CVal =
4060           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4061       llvm::Value *Cmp =
4062           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4063               ? CGF.Builder.CreateICmpSLT(CVal, N)
4064               : CGF.Builder.CreateICmpULT(CVal, N);
4065       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4066       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4067       // body:
4068       CGF.EmitBlock(BodyBB);
4069       // Iteri = Begini + Counter * Stepi;
4070       CGF.EmitIgnoredExpr(HelperData.Update);
4071     }
4072   }
4073   ~OMPIteratorGeneratorScope() {
4074     if (!E)
4075       return;
4076     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4077       // Counter = Counter + 1;
4078       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4079       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4080       // goto cont;
4081       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4082       // exit:
4083       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4084     }
4085   }
4086 };
4087 } // namespace
4088 
4089 static std::pair<llvm::Value *, llvm::Value *>
4090 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4091   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4092   llvm::Value *Addr;
4093   if (OASE) {
4094     const Expr *Base = OASE->getBase();
4095     Addr = CGF.EmitScalarExpr(Base);
4096   } else {
4097     Addr = CGF.EmitLValue(E).getPointer(CGF);
4098   }
4099   llvm::Value *SizeVal;
4100   QualType Ty = E->getType();
4101   if (OASE) {
4102     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4103     for (const Expr *SE : OASE->getDimensions()) {
4104       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4105       Sz = CGF.EmitScalarConversion(
4106           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4107       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4108     }
4109   } else if (const auto *ASE =
4110                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4111     LValue UpAddrLVal =
4112         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4113     llvm::Value *UpAddr =
4114         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4115     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4116     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4117     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4118   } else {
4119     SizeVal = CGF.getTypeSize(Ty);
4120   }
4121   return std::make_pair(Addr, SizeVal);
4122 }
4123 
4124 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4125 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4126   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4127   if (KmpTaskAffinityInfoTy.isNull()) {
4128     RecordDecl *KmpAffinityInfoRD =
4129         C.buildImplicitRecord("kmp_task_affinity_info_t");
4130     KmpAffinityInfoRD->startDefinition();
4131     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4132     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4133     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4134     KmpAffinityInfoRD->completeDefinition();
4135     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4136   }
4137 }
4138 
4139 CGOpenMPRuntime::TaskResultTy
4140 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4141                               const OMPExecutableDirective &D,
4142                               llvm::Function *TaskFunction, QualType SharedsTy,
4143                               Address Shareds, const OMPTaskDataTy &Data) {
4144   ASTContext &C = CGM.getContext();
4145   llvm::SmallVector<PrivateDataTy, 4> Privates;
4146   // Aggregate privates and sort them by the alignment.
4147   const auto *I = Data.PrivateCopies.begin();
4148   for (const Expr *E : Data.PrivateVars) {
4149     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4150     Privates.emplace_back(
4151         C.getDeclAlign(VD),
4152         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4153                          /*PrivateElemInit=*/nullptr));
4154     ++I;
4155   }
4156   I = Data.FirstprivateCopies.begin();
4157   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4158   for (const Expr *E : Data.FirstprivateVars) {
4159     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4160     Privates.emplace_back(
4161         C.getDeclAlign(VD),
4162         PrivateHelpersTy(
4163             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4164             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4165     ++I;
4166     ++IElemInitRef;
4167   }
4168   I = Data.LastprivateCopies.begin();
4169   for (const Expr *E : Data.LastprivateVars) {
4170     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4171     Privates.emplace_back(
4172         C.getDeclAlign(VD),
4173         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4174                          /*PrivateElemInit=*/nullptr));
4175     ++I;
4176   }
4177   for (const VarDecl *VD : Data.PrivateLocals) {
4178     if (isAllocatableDecl(VD))
4179       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4180     else
4181       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4182   }
4183   llvm::stable_sort(Privates,
4184                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4185                       return L.first > R.first;
4186                     });
4187   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4188   // Build type kmp_routine_entry_t (if not built yet).
4189   emitKmpRoutineEntryT(KmpInt32Ty);
4190   // Build type kmp_task_t (if not built yet).
4191   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4192     if (SavedKmpTaskloopTQTy.isNull()) {
4193       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4194           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4195     }
4196     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4197   } else {
4198     assert((D.getDirectiveKind() == OMPD_task ||
4199             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4200             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4201            "Expected taskloop, task or target directive");
4202     if (SavedKmpTaskTQTy.isNull()) {
4203       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4204           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4205     }
4206     KmpTaskTQTy = SavedKmpTaskTQTy;
4207   }
4208   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4209   // Build particular struct kmp_task_t for the given task.
4210   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4211       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4212   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4213   QualType KmpTaskTWithPrivatesPtrQTy =
4214       C.getPointerType(KmpTaskTWithPrivatesQTy);
4215   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4216   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4217       KmpTaskTWithPrivatesTy->getPointerTo();
4218   llvm::Value *KmpTaskTWithPrivatesTySize =
4219       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4220   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4221 
4222   // Emit initial values for private copies (if any).
4223   llvm::Value *TaskPrivatesMap = nullptr;
4224   llvm::Type *TaskPrivatesMapTy =
4225       std::next(TaskFunction->arg_begin(), 3)->getType();
4226   if (!Privates.empty()) {
4227     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4228     TaskPrivatesMap =
4229         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4230     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4231         TaskPrivatesMap, TaskPrivatesMapTy);
4232   } else {
4233     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4234         cast<llvm::PointerType>(TaskPrivatesMapTy));
4235   }
4236   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4237   // kmp_task_t *tt);
4238   llvm::Function *TaskEntry = emitProxyTaskFunction(
4239       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4240       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4241       TaskPrivatesMap);
4242 
4243   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4244   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4245   // kmp_routine_entry_t *task_entry);
4246   // Task flags. Format is taken from
4247   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4248   // description of kmp_tasking_flags struct.
4249   enum {
4250     TiedFlag = 0x1,
4251     FinalFlag = 0x2,
4252     DestructorsFlag = 0x8,
4253     PriorityFlag = 0x20,
4254     DetachableFlag = 0x40,
4255   };
4256   unsigned Flags = Data.Tied ? TiedFlag : 0;
4257   bool NeedsCleanup = false;
4258   if (!Privates.empty()) {
4259     NeedsCleanup =
4260         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4261     if (NeedsCleanup)
4262       Flags = Flags | DestructorsFlag;
4263   }
4264   if (Data.Priority.getInt())
4265     Flags = Flags | PriorityFlag;
4266   if (D.hasClausesOfKind<OMPDetachClause>())
4267     Flags = Flags | DetachableFlag;
4268   llvm::Value *TaskFlags =
4269       Data.Final.getPointer()
4270           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4271                                      CGF.Builder.getInt32(FinalFlag),
4272                                      CGF.Builder.getInt32(/*C=*/0))
4273           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4274   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4275   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4276   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4277       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4278       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4279           TaskEntry, KmpRoutineEntryPtrTy)};
4280   llvm::Value *NewTask;
4281   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4282     // Check if we have any device clause associated with the directive.
4283     const Expr *Device = nullptr;
4284     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4285       Device = C->getDevice();
4286     // Emit device ID if any otherwise use default value.
4287     llvm::Value *DeviceID;
4288     if (Device)
4289       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4290                                            CGF.Int64Ty, /*isSigned=*/true);
4291     else
4292       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4293     AllocArgs.push_back(DeviceID);
4294     NewTask = CGF.EmitRuntimeCall(
4295         OMPBuilder.getOrCreateRuntimeFunction(
4296             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4297         AllocArgs);
4298   } else {
4299     NewTask =
4300         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4301                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4302                             AllocArgs);
4303   }
4304   // Emit detach clause initialization.
4305   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4306   // task_descriptor);
4307   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4308     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4309     LValue EvtLVal = CGF.EmitLValue(Evt);
4310 
4311     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4312     // int gtid, kmp_task_t *task);
4313     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4314     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4315     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4316     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4317         OMPBuilder.getOrCreateRuntimeFunction(
4318             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4319         {Loc, Tid, NewTask});
4320     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4321                                       Evt->getExprLoc());
4322     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4323   }
4324   // Process affinity clauses.
4325   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4326     // Process list of affinity data.
4327     ASTContext &C = CGM.getContext();
4328     Address AffinitiesArray = Address::invalid();
4329     // Calculate number of elements to form the array of affinity data.
4330     llvm::Value *NumOfElements = nullptr;
4331     unsigned NumAffinities = 0;
4332     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4333       if (const Expr *Modifier = C->getModifier()) {
4334         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4335         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4336           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4337           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4338           NumOfElements =
4339               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4340         }
4341       } else {
4342         NumAffinities += C->varlist_size();
4343       }
4344     }
4345     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4346     // Fields ids in kmp_task_affinity_info record.
4347     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4348 
4349     QualType KmpTaskAffinityInfoArrayTy;
4350     if (NumOfElements) {
4351       NumOfElements = CGF.Builder.CreateNUWAdd(
4352           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4353       OpaqueValueExpr OVE(
4354           Loc,
4355           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4356           VK_RValue);
4357       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4358                                                     RValue::get(NumOfElements));
4359       KmpTaskAffinityInfoArrayTy =
4360           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4361                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4362       // Properly emit variable-sized array.
4363       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4364                                            ImplicitParamDecl::Other);
4365       CGF.EmitVarDecl(*PD);
4366       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4367       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4368                                                 /*isSigned=*/false);
4369     } else {
4370       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4371           KmpTaskAffinityInfoTy,
4372           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4373           ArrayType::Normal, /*IndexTypeQuals=*/0);
4374       AffinitiesArray =
4375           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4376       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4377       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4378                                              /*isSigned=*/false);
4379     }
4380 
4381     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4382     // Fill array by elements without iterators.
4383     unsigned Pos = 0;
4384     bool HasIterator = false;
4385     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4386       if (C->getModifier()) {
4387         HasIterator = true;
4388         continue;
4389       }
4390       for (const Expr *E : C->varlists()) {
4391         llvm::Value *Addr;
4392         llvm::Value *Size;
4393         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4394         LValue Base =
4395             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4396                                KmpTaskAffinityInfoTy);
4397         // affs[i].base_addr = &<Affinities[i].second>;
4398         LValue BaseAddrLVal = CGF.EmitLValueForField(
4399             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4400         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4401                               BaseAddrLVal);
4402         // affs[i].len = sizeof(<Affinities[i].second>);
4403         LValue LenLVal = CGF.EmitLValueForField(
4404             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4405         CGF.EmitStoreOfScalar(Size, LenLVal);
4406         ++Pos;
4407       }
4408     }
4409     LValue PosLVal;
4410     if (HasIterator) {
4411       PosLVal = CGF.MakeAddrLValue(
4412           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4413           C.getSizeType());
4414       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4415     }
4416     // Process elements with iterators.
4417     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4418       const Expr *Modifier = C->getModifier();
4419       if (!Modifier)
4420         continue;
4421       OMPIteratorGeneratorScope IteratorScope(
4422           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4423       for (const Expr *E : C->varlists()) {
4424         llvm::Value *Addr;
4425         llvm::Value *Size;
4426         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4427         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4428         LValue Base = CGF.MakeAddrLValue(
4429             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4430                     AffinitiesArray.getAlignment()),
4431             KmpTaskAffinityInfoTy);
4432         // affs[i].base_addr = &<Affinities[i].second>;
4433         LValue BaseAddrLVal = CGF.EmitLValueForField(
4434             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4435         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4436                               BaseAddrLVal);
4437         // affs[i].len = sizeof(<Affinities[i].second>);
4438         LValue LenLVal = CGF.EmitLValueForField(
4439             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4440         CGF.EmitStoreOfScalar(Size, LenLVal);
4441         Idx = CGF.Builder.CreateNUWAdd(
4442             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4443         CGF.EmitStoreOfScalar(Idx, PosLVal);
4444       }
4445     }
4446     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4447     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4448     // naffins, kmp_task_affinity_info_t *affin_list);
4449     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4450     llvm::Value *GTid = getThreadID(CGF, Loc);
4451     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4452         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4453     // FIXME: Emit the function and ignore its result for now unless the
4454     // runtime function is properly implemented.
4455     (void)CGF.EmitRuntimeCall(
4456         OMPBuilder.getOrCreateRuntimeFunction(
4457             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4458         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4459   }
4460   llvm::Value *NewTaskNewTaskTTy =
4461       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4462           NewTask, KmpTaskTWithPrivatesPtrTy);
4463   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4464                                                KmpTaskTWithPrivatesQTy);
4465   LValue TDBase =
4466       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4467   // Fill the data in the resulting kmp_task_t record.
4468   // Copy shareds if there are any.
4469   Address KmpTaskSharedsPtr = Address::invalid();
4470   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4471     KmpTaskSharedsPtr =
4472         Address(CGF.EmitLoadOfScalar(
4473                     CGF.EmitLValueForField(
4474                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4475                                            KmpTaskTShareds)),
4476                     Loc),
4477                 CGM.getNaturalTypeAlignment(SharedsTy));
4478     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4479     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4480     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4481   }
4482   // Emit initial values for private copies (if any).
4483   TaskResultTy Result;
4484   if (!Privates.empty()) {
4485     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4486                      SharedsTy, SharedsPtrTy, Data, Privates,
4487                      /*ForDup=*/false);
4488     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4489         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4490       Result.TaskDupFn = emitTaskDupFunction(
4491           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4492           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4493           /*WithLastIter=*/!Data.LastprivateVars.empty());
4494     }
4495   }
4496   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4497   enum { Priority = 0, Destructors = 1 };
4498   // Provide pointer to function with destructors for privates.
4499   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4500   const RecordDecl *KmpCmplrdataUD =
4501       (*FI)->getType()->getAsUnionType()->getDecl();
4502   if (NeedsCleanup) {
4503     llvm::Value *DestructorFn = emitDestructorsFunction(
4504         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4505         KmpTaskTWithPrivatesQTy);
4506     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4507     LValue DestructorsLV = CGF.EmitLValueForField(
4508         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4509     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4510                               DestructorFn, KmpRoutineEntryPtrTy),
4511                           DestructorsLV);
4512   }
4513   // Set priority.
4514   if (Data.Priority.getInt()) {
4515     LValue Data2LV = CGF.EmitLValueForField(
4516         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4517     LValue PriorityLV = CGF.EmitLValueForField(
4518         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4519     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4520   }
4521   Result.NewTask = NewTask;
4522   Result.TaskEntry = TaskEntry;
4523   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4524   Result.TDBase = TDBase;
4525   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4526   return Result;
4527 }
4528 
4529 namespace {
4530 /// Dependence kind for RTL.
4531 enum RTLDependenceKindTy {
4532   DepIn = 0x01,
4533   DepInOut = 0x3,
4534   DepMutexInOutSet = 0x4
4535 };
4536 /// Fields ids in kmp_depend_info record.
4537 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4538 } // namespace
4539 
4540 /// Translates internal dependency kind into the runtime kind.
4541 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4542   RTLDependenceKindTy DepKind;
4543   switch (K) {
4544   case OMPC_DEPEND_in:
4545     DepKind = DepIn;
4546     break;
4547   // Out and InOut dependencies must use the same code.
4548   case OMPC_DEPEND_out:
4549   case OMPC_DEPEND_inout:
4550     DepKind = DepInOut;
4551     break;
4552   case OMPC_DEPEND_mutexinoutset:
4553     DepKind = DepMutexInOutSet;
4554     break;
4555   case OMPC_DEPEND_source:
4556   case OMPC_DEPEND_sink:
4557   case OMPC_DEPEND_depobj:
4558   case OMPC_DEPEND_unknown:
4559     llvm_unreachable("Unknown task dependence type");
4560   }
4561   return DepKind;
4562 }
4563 
4564 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4565 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4566                            QualType &FlagsTy) {
4567   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4568   if (KmpDependInfoTy.isNull()) {
4569     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4570     KmpDependInfoRD->startDefinition();
4571     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4572     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4573     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4574     KmpDependInfoRD->completeDefinition();
4575     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4576   }
4577 }
4578 
4579 std::pair<llvm::Value *, LValue>
4580 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4581                                    SourceLocation Loc) {
4582   ASTContext &C = CGM.getContext();
4583   QualType FlagsTy;
4584   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4585   RecordDecl *KmpDependInfoRD =
4586       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4587   LValue Base = CGF.EmitLoadOfPointerLValue(
4588       DepobjLVal.getAddress(CGF),
4589       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4590   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4591   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4592           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4593   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4594                             Base.getTBAAInfo());
4595   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4596       Addr.getPointer(),
4597       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4598   LValue NumDepsBase = CGF.MakeAddrLValue(
4599       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4600       Base.getBaseInfo(), Base.getTBAAInfo());
4601   // NumDeps = deps[i].base_addr;
4602   LValue BaseAddrLVal = CGF.EmitLValueForField(
4603       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4604   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4605   return std::make_pair(NumDeps, Base);
4606 }
4607 
4608 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4609                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4610                            const OMPTaskDataTy::DependData &Data,
4611                            Address DependenciesArray) {
4612   CodeGenModule &CGM = CGF.CGM;
4613   ASTContext &C = CGM.getContext();
4614   QualType FlagsTy;
4615   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4616   RecordDecl *KmpDependInfoRD =
4617       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4618   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4619 
4620   OMPIteratorGeneratorScope IteratorScope(
4621       CGF, cast_or_null<OMPIteratorExpr>(
4622                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4623                                  : nullptr));
4624   for (const Expr *E : Data.DepExprs) {
4625     llvm::Value *Addr;
4626     llvm::Value *Size;
4627     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4628     LValue Base;
4629     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4630       Base = CGF.MakeAddrLValue(
4631           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4632     } else {
4633       LValue &PosLVal = *Pos.get<LValue *>();
4634       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4635       Base = CGF.MakeAddrLValue(
4636           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4637                   DependenciesArray.getAlignment()),
4638           KmpDependInfoTy);
4639     }
4640     // deps[i].base_addr = &<Dependencies[i].second>;
4641     LValue BaseAddrLVal = CGF.EmitLValueForField(
4642         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4643     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4644                           BaseAddrLVal);
4645     // deps[i].len = sizeof(<Dependencies[i].second>);
4646     LValue LenLVal = CGF.EmitLValueForField(
4647         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4648     CGF.EmitStoreOfScalar(Size, LenLVal);
4649     // deps[i].flags = <Dependencies[i].first>;
4650     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4651     LValue FlagsLVal = CGF.EmitLValueForField(
4652         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4653     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4654                           FlagsLVal);
4655     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4656       ++(*P);
4657     } else {
4658       LValue &PosLVal = *Pos.get<LValue *>();
4659       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4660       Idx = CGF.Builder.CreateNUWAdd(Idx,
4661                                      llvm::ConstantInt::get(Idx->getType(), 1));
4662       CGF.EmitStoreOfScalar(Idx, PosLVal);
4663     }
4664   }
4665 }
4666 
4667 static SmallVector<llvm::Value *, 4>
4668 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4669                         const OMPTaskDataTy::DependData &Data) {
4670   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4671          "Expected depobj dependecy kind.");
4672   SmallVector<llvm::Value *, 4> Sizes;
4673   SmallVector<LValue, 4> SizeLVals;
4674   ASTContext &C = CGF.getContext();
4675   QualType FlagsTy;
4676   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4677   RecordDecl *KmpDependInfoRD =
4678       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4679   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4680   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4681   {
4682     OMPIteratorGeneratorScope IteratorScope(
4683         CGF, cast_or_null<OMPIteratorExpr>(
4684                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4685                                    : nullptr));
4686     for (const Expr *E : Data.DepExprs) {
4687       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4688       LValue Base = CGF.EmitLoadOfPointerLValue(
4689           DepobjLVal.getAddress(CGF),
4690           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4691       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4692           Base.getAddress(CGF), KmpDependInfoPtrT);
4693       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4694                                 Base.getTBAAInfo());
4695       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4696           Addr.getPointer(),
4697           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4698       LValue NumDepsBase = CGF.MakeAddrLValue(
4699           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4700           Base.getBaseInfo(), Base.getTBAAInfo());
4701       // NumDeps = deps[i].base_addr;
4702       LValue BaseAddrLVal = CGF.EmitLValueForField(
4703           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4704       llvm::Value *NumDeps =
4705           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4706       LValue NumLVal = CGF.MakeAddrLValue(
4707           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4708           C.getUIntPtrType());
4709       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4710                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4711       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4712       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4713       CGF.EmitStoreOfScalar(Add, NumLVal);
4714       SizeLVals.push_back(NumLVal);
4715     }
4716   }
4717   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4718     llvm::Value *Size =
4719         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4720     Sizes.push_back(Size);
4721   }
4722   return Sizes;
4723 }
4724 
4725 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4726                                LValue PosLVal,
4727                                const OMPTaskDataTy::DependData &Data,
4728                                Address DependenciesArray) {
4729   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4730          "Expected depobj dependecy kind.");
4731   ASTContext &C = CGF.getContext();
4732   QualType FlagsTy;
4733   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4734   RecordDecl *KmpDependInfoRD =
4735       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4736   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4737   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4738   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4739   {
4740     OMPIteratorGeneratorScope IteratorScope(
4741         CGF, cast_or_null<OMPIteratorExpr>(
4742                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4743                                    : nullptr));
4744     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4745       const Expr *E = Data.DepExprs[I];
4746       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4747       LValue Base = CGF.EmitLoadOfPointerLValue(
4748           DepobjLVal.getAddress(CGF),
4749           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4750       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4751           Base.getAddress(CGF), KmpDependInfoPtrT);
4752       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4753                                 Base.getTBAAInfo());
4754 
4755       // Get number of elements in a single depobj.
4756       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4757           Addr.getPointer(),
4758           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4759       LValue NumDepsBase = CGF.MakeAddrLValue(
4760           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4761           Base.getBaseInfo(), Base.getTBAAInfo());
4762       // NumDeps = deps[i].base_addr;
4763       LValue BaseAddrLVal = CGF.EmitLValueForField(
4764           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4765       llvm::Value *NumDeps =
4766           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4767 
4768       // memcopy dependency data.
4769       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4770           ElSize,
4771           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4772       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4773       Address DepAddr =
4774           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4775                   DependenciesArray.getAlignment());
4776       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4777 
4778       // Increase pos.
4779       // pos += size;
4780       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4781       CGF.EmitStoreOfScalar(Add, PosLVal);
4782     }
4783   }
4784 }
4785 
4786 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4787     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4788     SourceLocation Loc) {
4789   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4790         return D.DepExprs.empty();
4791       }))
4792     return std::make_pair(nullptr, Address::invalid());
4793   // Process list of dependencies.
4794   ASTContext &C = CGM.getContext();
4795   Address DependenciesArray = Address::invalid();
4796   llvm::Value *NumOfElements = nullptr;
4797   unsigned NumDependencies = std::accumulate(
4798       Dependencies.begin(), Dependencies.end(), 0,
4799       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4800         return D.DepKind == OMPC_DEPEND_depobj
4801                    ? V
4802                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4803       });
4804   QualType FlagsTy;
4805   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4806   bool HasDepobjDeps = false;
4807   bool HasRegularWithIterators = false;
4808   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4809   llvm::Value *NumOfRegularWithIterators =
4810       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4811   // Calculate number of depobj dependecies and regular deps with the iterators.
4812   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4813     if (D.DepKind == OMPC_DEPEND_depobj) {
4814       SmallVector<llvm::Value *, 4> Sizes =
4815           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4816       for (llvm::Value *Size : Sizes) {
4817         NumOfDepobjElements =
4818             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4819       }
4820       HasDepobjDeps = true;
4821       continue;
4822     }
4823     // Include number of iterations, if any.
4824     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4825       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4826         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4827         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4828         NumOfRegularWithIterators =
4829             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4830       }
4831       HasRegularWithIterators = true;
4832       continue;
4833     }
4834   }
4835 
4836   QualType KmpDependInfoArrayTy;
4837   if (HasDepobjDeps || HasRegularWithIterators) {
4838     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4839                                            /*isSigned=*/false);
4840     if (HasDepobjDeps) {
4841       NumOfElements =
4842           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4843     }
4844     if (HasRegularWithIterators) {
4845       NumOfElements =
4846           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4847     }
4848     OpaqueValueExpr OVE(Loc,
4849                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4850                         VK_RValue);
4851     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4852                                                   RValue::get(NumOfElements));
4853     KmpDependInfoArrayTy =
4854         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4855                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4856     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4857     // Properly emit variable-sized array.
4858     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4859                                          ImplicitParamDecl::Other);
4860     CGF.EmitVarDecl(*PD);
4861     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4862     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4863                                               /*isSigned=*/false);
4864   } else {
4865     KmpDependInfoArrayTy = C.getConstantArrayType(
4866         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4867         ArrayType::Normal, /*IndexTypeQuals=*/0);
4868     DependenciesArray =
4869         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4870     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4871     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4872                                            /*isSigned=*/false);
4873   }
4874   unsigned Pos = 0;
4875   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4876     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4877         Dependencies[I].IteratorExpr)
4878       continue;
4879     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4880                    DependenciesArray);
4881   }
4882   // Copy regular dependecies with iterators.
4883   LValue PosLVal = CGF.MakeAddrLValue(
4884       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4885   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4886   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4887     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4888         !Dependencies[I].IteratorExpr)
4889       continue;
4890     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4891                    DependenciesArray);
4892   }
4893   // Copy final depobj arrays without iterators.
4894   if (HasDepobjDeps) {
4895     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4896       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4897         continue;
4898       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4899                          DependenciesArray);
4900     }
4901   }
4902   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4903       DependenciesArray, CGF.VoidPtrTy);
4904   return std::make_pair(NumOfElements, DependenciesArray);
4905 }
4906 
4907 Address CGOpenMPRuntime::emitDepobjDependClause(
4908     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4909     SourceLocation Loc) {
4910   if (Dependencies.DepExprs.empty())
4911     return Address::invalid();
4912   // Process list of dependencies.
4913   ASTContext &C = CGM.getContext();
4914   Address DependenciesArray = Address::invalid();
4915   unsigned NumDependencies = Dependencies.DepExprs.size();
4916   QualType FlagsTy;
4917   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4918   RecordDecl *KmpDependInfoRD =
4919       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4920 
4921   llvm::Value *Size;
4922   // Define type kmp_depend_info[<Dependencies.size()>];
4923   // For depobj reserve one extra element to store the number of elements.
4924   // It is required to handle depobj(x) update(in) construct.
4925   // kmp_depend_info[<Dependencies.size()>] deps;
4926   llvm::Value *NumDepsVal;
4927   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4928   if (const auto *IE =
4929           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4930     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4931     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4932       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4933       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4934       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4935     }
4936     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4937                                     NumDepsVal);
4938     CharUnits SizeInBytes =
4939         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4940     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4941     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4942     NumDepsVal =
4943         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4944   } else {
4945     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4946         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4947         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4948     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4949     Size = CGM.getSize(Sz.alignTo(Align));
4950     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4951   }
4952   // Need to allocate on the dynamic memory.
4953   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4954   // Use default allocator.
4955   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4956   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4957 
4958   llvm::Value *Addr =
4959       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4960                               CGM.getModule(), OMPRTL___kmpc_alloc),
4961                           Args, ".dep.arr.addr");
4962   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4963       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4964   DependenciesArray = Address(Addr, Align);
4965   // Write number of elements in the first element of array for depobj.
4966   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4967   // deps[i].base_addr = NumDependencies;
4968   LValue BaseAddrLVal = CGF.EmitLValueForField(
4969       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4970   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4971   llvm::PointerUnion<unsigned *, LValue *> Pos;
4972   unsigned Idx = 1;
4973   LValue PosLVal;
4974   if (Dependencies.IteratorExpr) {
4975     PosLVal = CGF.MakeAddrLValue(
4976         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4977         C.getSizeType());
4978     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4979                           /*IsInit=*/true);
4980     Pos = &PosLVal;
4981   } else {
4982     Pos = &Idx;
4983   }
4984   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4985   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4986       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4987   return DependenciesArray;
4988 }
4989 
4990 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4991                                         SourceLocation Loc) {
4992   ASTContext &C = CGM.getContext();
4993   QualType FlagsTy;
4994   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4995   LValue Base = CGF.EmitLoadOfPointerLValue(
4996       DepobjLVal.getAddress(CGF),
4997       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4998   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4999   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5000       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5001   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5002       Addr.getPointer(),
5003       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5004   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5005                                                                CGF.VoidPtrTy);
5006   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5007   // Use default allocator.
5008   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5009   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5010 
5011   // _kmpc_free(gtid, addr, nullptr);
5012   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5013                                 CGM.getModule(), OMPRTL___kmpc_free),
5014                             Args);
5015 }
5016 
5017 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5018                                        OpenMPDependClauseKind NewDepKind,
5019                                        SourceLocation Loc) {
5020   ASTContext &C = CGM.getContext();
5021   QualType FlagsTy;
5022   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5023   RecordDecl *KmpDependInfoRD =
5024       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5025   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5026   llvm::Value *NumDeps;
5027   LValue Base;
5028   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5029 
5030   Address Begin = Base.getAddress(CGF);
5031   // Cast from pointer to array type to pointer to single element.
5032   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5033   // The basic structure here is a while-do loop.
5034   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5035   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5036   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5037   CGF.EmitBlock(BodyBB);
5038   llvm::PHINode *ElementPHI =
5039       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5040   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5041   Begin = Address(ElementPHI, Begin.getAlignment());
5042   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5043                             Base.getTBAAInfo());
5044   // deps[i].flags = NewDepKind;
5045   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5046   LValue FlagsLVal = CGF.EmitLValueForField(
5047       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5048   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5049                         FlagsLVal);
5050 
5051   // Shift the address forward by one element.
5052   Address ElementNext =
5053       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5054   ElementPHI->addIncoming(ElementNext.getPointer(),
5055                           CGF.Builder.GetInsertBlock());
5056   llvm::Value *IsEmpty =
5057       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5058   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5059   // Done.
5060   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5061 }
5062 
5063 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5064                                    const OMPExecutableDirective &D,
5065                                    llvm::Function *TaskFunction,
5066                                    QualType SharedsTy, Address Shareds,
5067                                    const Expr *IfCond,
5068                                    const OMPTaskDataTy &Data) {
5069   if (!CGF.HaveInsertPoint())
5070     return;
5071 
5072   TaskResultTy Result =
5073       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5074   llvm::Value *NewTask = Result.NewTask;
5075   llvm::Function *TaskEntry = Result.TaskEntry;
5076   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5077   LValue TDBase = Result.TDBase;
5078   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5079   // Process list of dependences.
5080   Address DependenciesArray = Address::invalid();
5081   llvm::Value *NumOfElements;
5082   std::tie(NumOfElements, DependenciesArray) =
5083       emitDependClause(CGF, Data.Dependences, Loc);
5084 
5085   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5086   // libcall.
5087   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5088   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5089   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5090   // list is not empty
5091   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5092   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5093   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5094   llvm::Value *DepTaskArgs[7];
5095   if (!Data.Dependences.empty()) {
5096     DepTaskArgs[0] = UpLoc;
5097     DepTaskArgs[1] = ThreadID;
5098     DepTaskArgs[2] = NewTask;
5099     DepTaskArgs[3] = NumOfElements;
5100     DepTaskArgs[4] = DependenciesArray.getPointer();
5101     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5102     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5103   }
5104   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5105                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5106     if (!Data.Tied) {
5107       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5108       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5109       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5110     }
5111     if (!Data.Dependences.empty()) {
5112       CGF.EmitRuntimeCall(
5113           OMPBuilder.getOrCreateRuntimeFunction(
5114               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5115           DepTaskArgs);
5116     } else {
5117       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5118                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5119                           TaskArgs);
5120     }
5121     // Check if parent region is untied and build return for untied task;
5122     if (auto *Region =
5123             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5124       Region->emitUntiedSwitch(CGF);
5125   };
5126 
5127   llvm::Value *DepWaitTaskArgs[6];
5128   if (!Data.Dependences.empty()) {
5129     DepWaitTaskArgs[0] = UpLoc;
5130     DepWaitTaskArgs[1] = ThreadID;
5131     DepWaitTaskArgs[2] = NumOfElements;
5132     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5133     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5134     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5135   }
5136   auto &M = CGM.getModule();
5137   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5138                         TaskEntry, &Data, &DepWaitTaskArgs,
5139                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5140     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5141     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5142     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5143     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5144     // is specified.
5145     if (!Data.Dependences.empty())
5146       CGF.EmitRuntimeCall(
5147           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5148           DepWaitTaskArgs);
5149     // Call proxy_task_entry(gtid, new_task);
5150     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5151                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5152       Action.Enter(CGF);
5153       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5154       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5155                                                           OutlinedFnArgs);
5156     };
5157 
5158     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5159     // kmp_task_t *new_task);
5160     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5161     // kmp_task_t *new_task);
5162     RegionCodeGenTy RCG(CodeGen);
5163     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5164                               M, OMPRTL___kmpc_omp_task_begin_if0),
5165                           TaskArgs,
5166                           OMPBuilder.getOrCreateRuntimeFunction(
5167                               M, OMPRTL___kmpc_omp_task_complete_if0),
5168                           TaskArgs);
5169     RCG.setAction(Action);
5170     RCG(CGF);
5171   };
5172 
5173   if (IfCond) {
5174     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5175   } else {
5176     RegionCodeGenTy ThenRCG(ThenCodeGen);
5177     ThenRCG(CGF);
5178   }
5179 }
5180 
5181 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5182                                        const OMPLoopDirective &D,
5183                                        llvm::Function *TaskFunction,
5184                                        QualType SharedsTy, Address Shareds,
5185                                        const Expr *IfCond,
5186                                        const OMPTaskDataTy &Data) {
5187   if (!CGF.HaveInsertPoint())
5188     return;
5189   TaskResultTy Result =
5190       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5191   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5192   // libcall.
5193   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5194   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5195   // sched, kmp_uint64 grainsize, void *task_dup);
5196   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5197   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5198   llvm::Value *IfVal;
5199   if (IfCond) {
5200     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5201                                       /*isSigned=*/true);
5202   } else {
5203     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5204   }
5205 
5206   LValue LBLVal = CGF.EmitLValueForField(
5207       Result.TDBase,
5208       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5209   const auto *LBVar =
5210       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5211   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5212                        LBLVal.getQuals(),
5213                        /*IsInitializer=*/true);
5214   LValue UBLVal = CGF.EmitLValueForField(
5215       Result.TDBase,
5216       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5217   const auto *UBVar =
5218       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5219   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5220                        UBLVal.getQuals(),
5221                        /*IsInitializer=*/true);
5222   LValue StLVal = CGF.EmitLValueForField(
5223       Result.TDBase,
5224       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5225   const auto *StVar =
5226       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5227   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5228                        StLVal.getQuals(),
5229                        /*IsInitializer=*/true);
5230   // Store reductions address.
5231   LValue RedLVal = CGF.EmitLValueForField(
5232       Result.TDBase,
5233       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5234   if (Data.Reductions) {
5235     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5236   } else {
5237     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5238                                CGF.getContext().VoidPtrTy);
5239   }
5240   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5241   llvm::Value *TaskArgs[] = {
5242       UpLoc,
5243       ThreadID,
5244       Result.NewTask,
5245       IfVal,
5246       LBLVal.getPointer(CGF),
5247       UBLVal.getPointer(CGF),
5248       CGF.EmitLoadOfScalar(StLVal, Loc),
5249       llvm::ConstantInt::getSigned(
5250           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5251       llvm::ConstantInt::getSigned(
5252           CGF.IntTy, Data.Schedule.getPointer()
5253                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5254                          : NoSchedule),
5255       Data.Schedule.getPointer()
5256           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5257                                       /*isSigned=*/false)
5258           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5259       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5260                              Result.TaskDupFn, CGF.VoidPtrTy)
5261                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5262   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5263                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5264                       TaskArgs);
5265 }
5266 
5267 /// Emit reduction operation for each element of array (required for
5268 /// array sections) LHS op = RHS.
5269 /// \param Type Type of array.
5270 /// \param LHSVar Variable on the left side of the reduction operation
5271 /// (references element of array in original variable).
5272 /// \param RHSVar Variable on the right side of the reduction operation
5273 /// (references element of array in original variable).
5274 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5275 /// RHSVar.
5276 static void EmitOMPAggregateReduction(
5277     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5278     const VarDecl *RHSVar,
5279     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5280                                   const Expr *, const Expr *)> &RedOpGen,
5281     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5282     const Expr *UpExpr = nullptr) {
5283   // Perform element-by-element initialization.
5284   QualType ElementTy;
5285   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5286   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5287 
5288   // Drill down to the base element type on both arrays.
5289   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5290   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5291 
5292   llvm::Value *RHSBegin = RHSAddr.getPointer();
5293   llvm::Value *LHSBegin = LHSAddr.getPointer();
5294   // Cast from pointer to array type to pointer to single element.
5295   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5296   // The basic structure here is a while-do loop.
5297   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5298   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5299   llvm::Value *IsEmpty =
5300       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5301   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5302 
5303   // Enter the loop body, making that address the current address.
5304   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5305   CGF.EmitBlock(BodyBB);
5306 
5307   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5308 
5309   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5310       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5311   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5312   Address RHSElementCurrent =
5313       Address(RHSElementPHI,
5314               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5315 
5316   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5317       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5318   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5319   Address LHSElementCurrent =
5320       Address(LHSElementPHI,
5321               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5322 
5323   // Emit copy.
5324   CodeGenFunction::OMPPrivateScope Scope(CGF);
5325   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5326   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5327   Scope.Privatize();
5328   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5329   Scope.ForceCleanup();
5330 
5331   // Shift the address forward by one element.
5332   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5333       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5334   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5335       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5336   // Check whether we've reached the end.
5337   llvm::Value *Done =
5338       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5339   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5340   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5341   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5342 
5343   // Done.
5344   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5345 }
5346 
5347 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5348 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5349 /// UDR combiner function.
5350 static void emitReductionCombiner(CodeGenFunction &CGF,
5351                                   const Expr *ReductionOp) {
5352   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5353     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5354       if (const auto *DRE =
5355               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5356         if (const auto *DRD =
5357                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5358           std::pair<llvm::Function *, llvm::Function *> Reduction =
5359               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5360           RValue Func = RValue::get(Reduction.first);
5361           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5362           CGF.EmitIgnoredExpr(ReductionOp);
5363           return;
5364         }
5365   CGF.EmitIgnoredExpr(ReductionOp);
5366 }
5367 
5368 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5369     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5370     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5371     ArrayRef<const Expr *> ReductionOps) {
5372   ASTContext &C = CGM.getContext();
5373 
5374   // void reduction_func(void *LHSArg, void *RHSArg);
5375   FunctionArgList Args;
5376   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5377                            ImplicitParamDecl::Other);
5378   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5379                            ImplicitParamDecl::Other);
5380   Args.push_back(&LHSArg);
5381   Args.push_back(&RHSArg);
5382   const auto &CGFI =
5383       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5384   std::string Name = getName({"omp", "reduction", "reduction_func"});
5385   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5386                                     llvm::GlobalValue::InternalLinkage, Name,
5387                                     &CGM.getModule());
5388   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5389   Fn->setDoesNotRecurse();
5390   CodeGenFunction CGF(CGM);
5391   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5392 
5393   // Dst = (void*[n])(LHSArg);
5394   // Src = (void*[n])(RHSArg);
5395   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5396       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5397       ArgsType), CGF.getPointerAlign());
5398   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5399       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5400       ArgsType), CGF.getPointerAlign());
5401 
5402   //  ...
5403   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5404   //  ...
5405   CodeGenFunction::OMPPrivateScope Scope(CGF);
5406   auto IPriv = Privates.begin();
5407   unsigned Idx = 0;
5408   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5409     const auto *RHSVar =
5410         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5411     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5412       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5413     });
5414     const auto *LHSVar =
5415         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5416     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5417       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5418     });
5419     QualType PrivTy = (*IPriv)->getType();
5420     if (PrivTy->isVariablyModifiedType()) {
5421       // Get array size and emit VLA type.
5422       ++Idx;
5423       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5424       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5425       const VariableArrayType *VLA =
5426           CGF.getContext().getAsVariableArrayType(PrivTy);
5427       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5428       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5429           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5430       CGF.EmitVariablyModifiedType(PrivTy);
5431     }
5432   }
5433   Scope.Privatize();
5434   IPriv = Privates.begin();
5435   auto ILHS = LHSExprs.begin();
5436   auto IRHS = RHSExprs.begin();
5437   for (const Expr *E : ReductionOps) {
5438     if ((*IPriv)->getType()->isArrayType()) {
5439       // Emit reduction for array section.
5440       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5441       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5442       EmitOMPAggregateReduction(
5443           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5444           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5445             emitReductionCombiner(CGF, E);
5446           });
5447     } else {
5448       // Emit reduction for array subscript or single variable.
5449       emitReductionCombiner(CGF, E);
5450     }
5451     ++IPriv;
5452     ++ILHS;
5453     ++IRHS;
5454   }
5455   Scope.ForceCleanup();
5456   CGF.FinishFunction();
5457   return Fn;
5458 }
5459 
5460 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5461                                                   const Expr *ReductionOp,
5462                                                   const Expr *PrivateRef,
5463                                                   const DeclRefExpr *LHS,
5464                                                   const DeclRefExpr *RHS) {
5465   if (PrivateRef->getType()->isArrayType()) {
5466     // Emit reduction for array section.
5467     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5468     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5469     EmitOMPAggregateReduction(
5470         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5471         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5472           emitReductionCombiner(CGF, ReductionOp);
5473         });
5474   } else {
5475     // Emit reduction for array subscript or single variable.
5476     emitReductionCombiner(CGF, ReductionOp);
5477   }
5478 }
5479 
5480 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5481                                     ArrayRef<const Expr *> Privates,
5482                                     ArrayRef<const Expr *> LHSExprs,
5483                                     ArrayRef<const Expr *> RHSExprs,
5484                                     ArrayRef<const Expr *> ReductionOps,
5485                                     ReductionOptionsTy Options) {
5486   if (!CGF.HaveInsertPoint())
5487     return;
5488 
5489   bool WithNowait = Options.WithNowait;
5490   bool SimpleReduction = Options.SimpleReduction;
5491 
5492   // Next code should be emitted for reduction:
5493   //
5494   // static kmp_critical_name lock = { 0 };
5495   //
5496   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5497   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5498   //  ...
5499   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5500   //  *(Type<n>-1*)rhs[<n>-1]);
5501   // }
5502   //
5503   // ...
5504   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5505   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5506   // RedList, reduce_func, &<lock>)) {
5507   // case 1:
5508   //  ...
5509   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5510   //  ...
5511   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5512   // break;
5513   // case 2:
5514   //  ...
5515   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5516   //  ...
5517   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5518   // break;
5519   // default:;
5520   // }
5521   //
5522   // if SimpleReduction is true, only the next code is generated:
5523   //  ...
5524   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5525   //  ...
5526 
5527   ASTContext &C = CGM.getContext();
5528 
5529   if (SimpleReduction) {
5530     CodeGenFunction::RunCleanupsScope Scope(CGF);
5531     auto IPriv = Privates.begin();
5532     auto ILHS = LHSExprs.begin();
5533     auto IRHS = RHSExprs.begin();
5534     for (const Expr *E : ReductionOps) {
5535       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5536                                   cast<DeclRefExpr>(*IRHS));
5537       ++IPriv;
5538       ++ILHS;
5539       ++IRHS;
5540     }
5541     return;
5542   }
5543 
5544   // 1. Build a list of reduction variables.
5545   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5546   auto Size = RHSExprs.size();
5547   for (const Expr *E : Privates) {
5548     if (E->getType()->isVariablyModifiedType())
5549       // Reserve place for array size.
5550       ++Size;
5551   }
5552   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5553   QualType ReductionArrayTy =
5554       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5555                              /*IndexTypeQuals=*/0);
5556   Address ReductionList =
5557       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5558   auto IPriv = Privates.begin();
5559   unsigned Idx = 0;
5560   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5561     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5562     CGF.Builder.CreateStore(
5563         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5564             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5565         Elem);
5566     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5567       // Store array size.
5568       ++Idx;
5569       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5570       llvm::Value *Size = CGF.Builder.CreateIntCast(
5571           CGF.getVLASize(
5572                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5573               .NumElts,
5574           CGF.SizeTy, /*isSigned=*/false);
5575       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5576                               Elem);
5577     }
5578   }
5579 
5580   // 2. Emit reduce_func().
5581   llvm::Function *ReductionFn = emitReductionFunction(
5582       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5583       LHSExprs, RHSExprs, ReductionOps);
5584 
5585   // 3. Create static kmp_critical_name lock = { 0 };
5586   std::string Name = getName({"reduction"});
5587   llvm::Value *Lock = getCriticalRegionLock(Name);
5588 
5589   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5590   // RedList, reduce_func, &<lock>);
5591   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5592   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5593   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5594   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5595       ReductionList.getPointer(), CGF.VoidPtrTy);
5596   llvm::Value *Args[] = {
5597       IdentTLoc,                             // ident_t *<loc>
5598       ThreadId,                              // i32 <gtid>
5599       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5600       ReductionArrayTySize,                  // size_type sizeof(RedList)
5601       RL,                                    // void *RedList
5602       ReductionFn, // void (*) (void *, void *) <reduce_func>
5603       Lock         // kmp_critical_name *&<lock>
5604   };
5605   llvm::Value *Res = CGF.EmitRuntimeCall(
5606       OMPBuilder.getOrCreateRuntimeFunction(
5607           CGM.getModule(),
5608           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5609       Args);
5610 
5611   // 5. Build switch(res)
5612   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5613   llvm::SwitchInst *SwInst =
5614       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5615 
5616   // 6. Build case 1:
5617   //  ...
5618   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5619   //  ...
5620   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5621   // break;
5622   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5623   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5624   CGF.EmitBlock(Case1BB);
5625 
5626   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5627   llvm::Value *EndArgs[] = {
5628       IdentTLoc, // ident_t *<loc>
5629       ThreadId,  // i32 <gtid>
5630       Lock       // kmp_critical_name *&<lock>
5631   };
5632   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5633                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5634     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5635     auto IPriv = Privates.begin();
5636     auto ILHS = LHSExprs.begin();
5637     auto IRHS = RHSExprs.begin();
5638     for (const Expr *E : ReductionOps) {
5639       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5640                                      cast<DeclRefExpr>(*IRHS));
5641       ++IPriv;
5642       ++ILHS;
5643       ++IRHS;
5644     }
5645   };
5646   RegionCodeGenTy RCG(CodeGen);
5647   CommonActionTy Action(
5648       nullptr, llvm::None,
5649       OMPBuilder.getOrCreateRuntimeFunction(
5650           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5651                                       : OMPRTL___kmpc_end_reduce),
5652       EndArgs);
5653   RCG.setAction(Action);
5654   RCG(CGF);
5655 
5656   CGF.EmitBranch(DefaultBB);
5657 
5658   // 7. Build case 2:
5659   //  ...
5660   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5661   //  ...
5662   // break;
5663   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5664   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5665   CGF.EmitBlock(Case2BB);
5666 
5667   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5668                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5669     auto ILHS = LHSExprs.begin();
5670     auto IRHS = RHSExprs.begin();
5671     auto IPriv = Privates.begin();
5672     for (const Expr *E : ReductionOps) {
5673       const Expr *XExpr = nullptr;
5674       const Expr *EExpr = nullptr;
5675       const Expr *UpExpr = nullptr;
5676       BinaryOperatorKind BO = BO_Comma;
5677       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5678         if (BO->getOpcode() == BO_Assign) {
5679           XExpr = BO->getLHS();
5680           UpExpr = BO->getRHS();
5681         }
5682       }
5683       // Try to emit update expression as a simple atomic.
5684       const Expr *RHSExpr = UpExpr;
5685       if (RHSExpr) {
5686         // Analyze RHS part of the whole expression.
5687         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5688                 RHSExpr->IgnoreParenImpCasts())) {
5689           // If this is a conditional operator, analyze its condition for
5690           // min/max reduction operator.
5691           RHSExpr = ACO->getCond();
5692         }
5693         if (const auto *BORHS =
5694                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5695           EExpr = BORHS->getRHS();
5696           BO = BORHS->getOpcode();
5697         }
5698       }
5699       if (XExpr) {
5700         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5701         auto &&AtomicRedGen = [BO, VD,
5702                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5703                                     const Expr *EExpr, const Expr *UpExpr) {
5704           LValue X = CGF.EmitLValue(XExpr);
5705           RValue E;
5706           if (EExpr)
5707             E = CGF.EmitAnyExpr(EExpr);
5708           CGF.EmitOMPAtomicSimpleUpdateExpr(
5709               X, E, BO, /*IsXLHSInRHSPart=*/true,
5710               llvm::AtomicOrdering::Monotonic, Loc,
5711               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5712                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5713                 PrivateScope.addPrivate(
5714                     VD, [&CGF, VD, XRValue, Loc]() {
5715                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5716                       CGF.emitOMPSimpleStore(
5717                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5718                           VD->getType().getNonReferenceType(), Loc);
5719                       return LHSTemp;
5720                     });
5721                 (void)PrivateScope.Privatize();
5722                 return CGF.EmitAnyExpr(UpExpr);
5723               });
5724         };
5725         if ((*IPriv)->getType()->isArrayType()) {
5726           // Emit atomic reduction for array section.
5727           const auto *RHSVar =
5728               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5729           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5730                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5731         } else {
5732           // Emit atomic reduction for array subscript or single variable.
5733           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5734         }
5735       } else {
5736         // Emit as a critical region.
5737         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5738                                            const Expr *, const Expr *) {
5739           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5740           std::string Name = RT.getName({"atomic_reduction"});
5741           RT.emitCriticalRegion(
5742               CGF, Name,
5743               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5744                 Action.Enter(CGF);
5745                 emitReductionCombiner(CGF, E);
5746               },
5747               Loc);
5748         };
5749         if ((*IPriv)->getType()->isArrayType()) {
5750           const auto *LHSVar =
5751               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5752           const auto *RHSVar =
5753               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5754           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5755                                     CritRedGen);
5756         } else {
5757           CritRedGen(CGF, nullptr, nullptr, nullptr);
5758         }
5759       }
5760       ++ILHS;
5761       ++IRHS;
5762       ++IPriv;
5763     }
5764   };
5765   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5766   if (!WithNowait) {
5767     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5768     llvm::Value *EndArgs[] = {
5769         IdentTLoc, // ident_t *<loc>
5770         ThreadId,  // i32 <gtid>
5771         Lock       // kmp_critical_name *&<lock>
5772     };
5773     CommonActionTy Action(nullptr, llvm::None,
5774                           OMPBuilder.getOrCreateRuntimeFunction(
5775                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5776                           EndArgs);
5777     AtomicRCG.setAction(Action);
5778     AtomicRCG(CGF);
5779   } else {
5780     AtomicRCG(CGF);
5781   }
5782 
5783   CGF.EmitBranch(DefaultBB);
5784   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5785 }
5786 
5787 /// Generates unique name for artificial threadprivate variables.
5788 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5789 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5790                                       const Expr *Ref) {
5791   SmallString<256> Buffer;
5792   llvm::raw_svector_ostream Out(Buffer);
5793   const clang::DeclRefExpr *DE;
5794   const VarDecl *D = ::getBaseDecl(Ref, DE);
5795   if (!D)
5796     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5797   D = D->getCanonicalDecl();
5798   std::string Name = CGM.getOpenMPRuntime().getName(
5799       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5800   Out << Prefix << Name << "_"
5801       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5802   return std::string(Out.str());
5803 }
5804 
5805 /// Emits reduction initializer function:
5806 /// \code
5807 /// void @.red_init(void* %arg, void* %orig) {
5808 /// %0 = bitcast void* %arg to <type>*
5809 /// store <type> <init>, <type>* %0
5810 /// ret void
5811 /// }
5812 /// \endcode
5813 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5814                                            SourceLocation Loc,
5815                                            ReductionCodeGen &RCG, unsigned N) {
5816   ASTContext &C = CGM.getContext();
5817   QualType VoidPtrTy = C.VoidPtrTy;
5818   VoidPtrTy.addRestrict();
5819   FunctionArgList Args;
5820   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5821                           ImplicitParamDecl::Other);
5822   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5823                               ImplicitParamDecl::Other);
5824   Args.emplace_back(&Param);
5825   Args.emplace_back(&ParamOrig);
5826   const auto &FnInfo =
5827       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5828   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5829   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5830   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5831                                     Name, &CGM.getModule());
5832   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5833   Fn->setDoesNotRecurse();
5834   CodeGenFunction CGF(CGM);
5835   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5836   Address PrivateAddr = CGF.EmitLoadOfPointer(
5837       CGF.GetAddrOfLocalVar(&Param),
5838       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5839   llvm::Value *Size = nullptr;
5840   // If the size of the reduction item is non-constant, load it from global
5841   // threadprivate variable.
5842   if (RCG.getSizes(N).second) {
5843     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5844         CGF, CGM.getContext().getSizeType(),
5845         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5846     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5847                                 CGM.getContext().getSizeType(), Loc);
5848   }
5849   RCG.emitAggregateType(CGF, N, Size);
5850   LValue OrigLVal;
5851   // If initializer uses initializer from declare reduction construct, emit a
5852   // pointer to the address of the original reduction item (reuired by reduction
5853   // initializer)
5854   if (RCG.usesReductionInitializer(N)) {
5855     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5856     SharedAddr = CGF.EmitLoadOfPointer(
5857         SharedAddr,
5858         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5859     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5860   } else {
5861     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5862         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5863         CGM.getContext().VoidPtrTy);
5864   }
5865   // Emit the initializer:
5866   // %0 = bitcast void* %arg to <type>*
5867   // store <type> <init>, <type>* %0
5868   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5869                          [](CodeGenFunction &) { return false; });
5870   CGF.FinishFunction();
5871   return Fn;
5872 }
5873 
5874 /// Emits reduction combiner function:
5875 /// \code
5876 /// void @.red_comb(void* %arg0, void* %arg1) {
5877 /// %lhs = bitcast void* %arg0 to <type>*
5878 /// %rhs = bitcast void* %arg1 to <type>*
5879 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5880 /// store <type> %2, <type>* %lhs
5881 /// ret void
5882 /// }
5883 /// \endcode
5884 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5885                                            SourceLocation Loc,
5886                                            ReductionCodeGen &RCG, unsigned N,
5887                                            const Expr *ReductionOp,
5888                                            const Expr *LHS, const Expr *RHS,
5889                                            const Expr *PrivateRef) {
5890   ASTContext &C = CGM.getContext();
5891   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5892   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5893   FunctionArgList Args;
5894   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5895                                C.VoidPtrTy, ImplicitParamDecl::Other);
5896   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5897                             ImplicitParamDecl::Other);
5898   Args.emplace_back(&ParamInOut);
5899   Args.emplace_back(&ParamIn);
5900   const auto &FnInfo =
5901       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5902   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5903   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5904   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5905                                     Name, &CGM.getModule());
5906   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5907   Fn->setDoesNotRecurse();
5908   CodeGenFunction CGF(CGM);
5909   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5910   llvm::Value *Size = nullptr;
5911   // If the size of the reduction item is non-constant, load it from global
5912   // threadprivate variable.
5913   if (RCG.getSizes(N).second) {
5914     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5915         CGF, CGM.getContext().getSizeType(),
5916         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5917     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5918                                 CGM.getContext().getSizeType(), Loc);
5919   }
5920   RCG.emitAggregateType(CGF, N, Size);
5921   // Remap lhs and rhs variables to the addresses of the function arguments.
5922   // %lhs = bitcast void* %arg0 to <type>*
5923   // %rhs = bitcast void* %arg1 to <type>*
5924   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5925   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5926     // Pull out the pointer to the variable.
5927     Address PtrAddr = CGF.EmitLoadOfPointer(
5928         CGF.GetAddrOfLocalVar(&ParamInOut),
5929         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5930     return CGF.Builder.CreateElementBitCast(
5931         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5932   });
5933   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5934     // Pull out the pointer to the variable.
5935     Address PtrAddr = CGF.EmitLoadOfPointer(
5936         CGF.GetAddrOfLocalVar(&ParamIn),
5937         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5938     return CGF.Builder.CreateElementBitCast(
5939         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5940   });
5941   PrivateScope.Privatize();
5942   // Emit the combiner body:
5943   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5944   // store <type> %2, <type>* %lhs
5945   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5946       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5947       cast<DeclRefExpr>(RHS));
5948   CGF.FinishFunction();
5949   return Fn;
5950 }
5951 
5952 /// Emits reduction finalizer function:
5953 /// \code
5954 /// void @.red_fini(void* %arg) {
5955 /// %0 = bitcast void* %arg to <type>*
5956 /// <destroy>(<type>* %0)
5957 /// ret void
5958 /// }
5959 /// \endcode
5960 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5961                                            SourceLocation Loc,
5962                                            ReductionCodeGen &RCG, unsigned N) {
5963   if (!RCG.needCleanups(N))
5964     return nullptr;
5965   ASTContext &C = CGM.getContext();
5966   FunctionArgList Args;
5967   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5968                           ImplicitParamDecl::Other);
5969   Args.emplace_back(&Param);
5970   const auto &FnInfo =
5971       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5972   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5973   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5974   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5975                                     Name, &CGM.getModule());
5976   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5977   Fn->setDoesNotRecurse();
5978   CodeGenFunction CGF(CGM);
5979   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5980   Address PrivateAddr = CGF.EmitLoadOfPointer(
5981       CGF.GetAddrOfLocalVar(&Param),
5982       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5983   llvm::Value *Size = nullptr;
5984   // If the size of the reduction item is non-constant, load it from global
5985   // threadprivate variable.
5986   if (RCG.getSizes(N).second) {
5987     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5988         CGF, CGM.getContext().getSizeType(),
5989         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5990     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5991                                 CGM.getContext().getSizeType(), Loc);
5992   }
5993   RCG.emitAggregateType(CGF, N, Size);
5994   // Emit the finalizer body:
5995   // <destroy>(<type>* %0)
5996   RCG.emitCleanups(CGF, N, PrivateAddr);
5997   CGF.FinishFunction(Loc);
5998   return Fn;
5999 }
6000 
6001 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6002     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6003     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6004   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6005     return nullptr;
6006 
6007   // Build typedef struct:
6008   // kmp_taskred_input {
6009   //   void *reduce_shar; // shared reduction item
6010   //   void *reduce_orig; // original reduction item used for initialization
6011   //   size_t reduce_size; // size of data item
6012   //   void *reduce_init; // data initialization routine
6013   //   void *reduce_fini; // data finalization routine
6014   //   void *reduce_comb; // data combiner routine
6015   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6016   // } kmp_taskred_input_t;
6017   ASTContext &C = CGM.getContext();
6018   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6019   RD->startDefinition();
6020   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6021   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6022   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6023   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6024   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6025   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6026   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6027       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6028   RD->completeDefinition();
6029   QualType RDType = C.getRecordType(RD);
6030   unsigned Size = Data.ReductionVars.size();
6031   llvm::APInt ArraySize(/*numBits=*/64, Size);
6032   QualType ArrayRDType = C.getConstantArrayType(
6033       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6034   // kmp_task_red_input_t .rd_input.[Size];
6035   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6036   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6037                        Data.ReductionCopies, Data.ReductionOps);
6038   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6039     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6040     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6041                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6042     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6043         TaskRedInput.getPointer(), Idxs,
6044         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6045         ".rd_input.gep.");
6046     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6047     // ElemLVal.reduce_shar = &Shareds[Cnt];
6048     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6049     RCG.emitSharedOrigLValue(CGF, Cnt);
6050     llvm::Value *CastedShared =
6051         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6052     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6053     // ElemLVal.reduce_orig = &Origs[Cnt];
6054     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6055     llvm::Value *CastedOrig =
6056         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6057     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6058     RCG.emitAggregateType(CGF, Cnt);
6059     llvm::Value *SizeValInChars;
6060     llvm::Value *SizeVal;
6061     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6062     // We use delayed creation/initialization for VLAs and array sections. It is
6063     // required because runtime does not provide the way to pass the sizes of
6064     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6065     // threadprivate global variables are used to store these values and use
6066     // them in the functions.
6067     bool DelayedCreation = !!SizeVal;
6068     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6069                                                /*isSigned=*/false);
6070     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6071     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6072     // ElemLVal.reduce_init = init;
6073     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6074     llvm::Value *InitAddr =
6075         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6076     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6077     // ElemLVal.reduce_fini = fini;
6078     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6079     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6080     llvm::Value *FiniAddr = Fini
6081                                 ? CGF.EmitCastToVoidPtr(Fini)
6082                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6083     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6084     // ElemLVal.reduce_comb = comb;
6085     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6086     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6087         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6088         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6089     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6090     // ElemLVal.flags = 0;
6091     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6092     if (DelayedCreation) {
6093       CGF.EmitStoreOfScalar(
6094           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6095           FlagsLVal);
6096     } else
6097       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6098                                  FlagsLVal.getType());
6099   }
6100   if (Data.IsReductionWithTaskMod) {
6101     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6102     // is_ws, int num, void *data);
6103     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6104     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6105                                                   CGM.IntTy, /*isSigned=*/true);
6106     llvm::Value *Args[] = {
6107         IdentTLoc, GTid,
6108         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6109                                /*isSigned=*/true),
6110         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6111         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6112             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6113     return CGF.EmitRuntimeCall(
6114         OMPBuilder.getOrCreateRuntimeFunction(
6115             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6116         Args);
6117   }
6118   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6119   llvm::Value *Args[] = {
6120       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6121                                 /*isSigned=*/true),
6122       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6123       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6124                                                       CGM.VoidPtrTy)};
6125   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6126                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6127                              Args);
6128 }
6129 
6130 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6131                                             SourceLocation Loc,
6132                                             bool IsWorksharingReduction) {
6133   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6134   // is_ws, int num, void *data);
6135   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6136   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6137                                                 CGM.IntTy, /*isSigned=*/true);
6138   llvm::Value *Args[] = {IdentTLoc, GTid,
6139                          llvm::ConstantInt::get(CGM.IntTy,
6140                                                 IsWorksharingReduction ? 1 : 0,
6141                                                 /*isSigned=*/true)};
6142   (void)CGF.EmitRuntimeCall(
6143       OMPBuilder.getOrCreateRuntimeFunction(
6144           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6145       Args);
6146 }
6147 
6148 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6149                                               SourceLocation Loc,
6150                                               ReductionCodeGen &RCG,
6151                                               unsigned N) {
6152   auto Sizes = RCG.getSizes(N);
6153   // Emit threadprivate global variable if the type is non-constant
6154   // (Sizes.second = nullptr).
6155   if (Sizes.second) {
6156     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6157                                                      /*isSigned=*/false);
6158     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6159         CGF, CGM.getContext().getSizeType(),
6160         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6161     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6162   }
6163 }
6164 
6165 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6166                                               SourceLocation Loc,
6167                                               llvm::Value *ReductionsPtr,
6168                                               LValue SharedLVal) {
6169   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6170   // *d);
6171   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6172                                                    CGM.IntTy,
6173                                                    /*isSigned=*/true),
6174                          ReductionsPtr,
6175                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6176                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6177   return Address(
6178       CGF.EmitRuntimeCall(
6179           OMPBuilder.getOrCreateRuntimeFunction(
6180               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6181           Args),
6182       SharedLVal.getAlignment());
6183 }
6184 
6185 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6186                                        SourceLocation Loc) {
6187   if (!CGF.HaveInsertPoint())
6188     return;
6189 
6190   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6191     OMPBuilder.createTaskwait(CGF.Builder);
6192   } else {
6193     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6194     // global_tid);
6195     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6196     // Ignore return result until untied tasks are supported.
6197     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6198                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6199                         Args);
6200   }
6201 
6202   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6203     Region->emitUntiedSwitch(CGF);
6204 }
6205 
6206 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6207                                            OpenMPDirectiveKind InnerKind,
6208                                            const RegionCodeGenTy &CodeGen,
6209                                            bool HasCancel) {
6210   if (!CGF.HaveInsertPoint())
6211     return;
6212   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6213   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6214 }
6215 
6216 namespace {
6217 enum RTCancelKind {
6218   CancelNoreq = 0,
6219   CancelParallel = 1,
6220   CancelLoop = 2,
6221   CancelSections = 3,
6222   CancelTaskgroup = 4
6223 };
6224 } // anonymous namespace
6225 
6226 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6227   RTCancelKind CancelKind = CancelNoreq;
6228   if (CancelRegion == OMPD_parallel)
6229     CancelKind = CancelParallel;
6230   else if (CancelRegion == OMPD_for)
6231     CancelKind = CancelLoop;
6232   else if (CancelRegion == OMPD_sections)
6233     CancelKind = CancelSections;
6234   else {
6235     assert(CancelRegion == OMPD_taskgroup);
6236     CancelKind = CancelTaskgroup;
6237   }
6238   return CancelKind;
6239 }
6240 
6241 void CGOpenMPRuntime::emitCancellationPointCall(
6242     CodeGenFunction &CGF, SourceLocation Loc,
6243     OpenMPDirectiveKind CancelRegion) {
6244   if (!CGF.HaveInsertPoint())
6245     return;
6246   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6247   // global_tid, kmp_int32 cncl_kind);
6248   if (auto *OMPRegionInfo =
6249           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6250     // For 'cancellation point taskgroup', the task region info may not have a
6251     // cancel. This may instead happen in another adjacent task.
6252     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6253       llvm::Value *Args[] = {
6254           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6255           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6256       // Ignore return result until untied tasks are supported.
6257       llvm::Value *Result = CGF.EmitRuntimeCall(
6258           OMPBuilder.getOrCreateRuntimeFunction(
6259               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6260           Args);
6261       // if (__kmpc_cancellationpoint()) {
6262       //   exit from construct;
6263       // }
6264       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6265       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6266       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6267       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6268       CGF.EmitBlock(ExitBB);
6269       // exit from construct;
6270       CodeGenFunction::JumpDest CancelDest =
6271           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6272       CGF.EmitBranchThroughCleanup(CancelDest);
6273       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6274     }
6275   }
6276 }
6277 
6278 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6279                                      const Expr *IfCond,
6280                                      OpenMPDirectiveKind CancelRegion) {
6281   if (!CGF.HaveInsertPoint())
6282     return;
6283   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6284   // kmp_int32 cncl_kind);
6285   auto &M = CGM.getModule();
6286   if (auto *OMPRegionInfo =
6287           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6288     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6289                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6290       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6291       llvm::Value *Args[] = {
6292           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6293           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6294       // Ignore return result until untied tasks are supported.
6295       llvm::Value *Result = CGF.EmitRuntimeCall(
6296           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6297       // if (__kmpc_cancel()) {
6298       //   exit from construct;
6299       // }
6300       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6301       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6302       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6303       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6304       CGF.EmitBlock(ExitBB);
6305       // exit from construct;
6306       CodeGenFunction::JumpDest CancelDest =
6307           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6308       CGF.EmitBranchThroughCleanup(CancelDest);
6309       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6310     };
6311     if (IfCond) {
6312       emitIfClause(CGF, IfCond, ThenGen,
6313                    [](CodeGenFunction &, PrePostActionTy &) {});
6314     } else {
6315       RegionCodeGenTy ThenRCG(ThenGen);
6316       ThenRCG(CGF);
6317     }
6318   }
6319 }
6320 
6321 namespace {
6322 /// Cleanup action for uses_allocators support.
6323 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6324   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6325 
6326 public:
6327   OMPUsesAllocatorsActionTy(
6328       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6329       : Allocators(Allocators) {}
6330   void Enter(CodeGenFunction &CGF) override {
6331     if (!CGF.HaveInsertPoint())
6332       return;
6333     for (const auto &AllocatorData : Allocators) {
6334       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6335           CGF, AllocatorData.first, AllocatorData.second);
6336     }
6337   }
6338   void Exit(CodeGenFunction &CGF) override {
6339     if (!CGF.HaveInsertPoint())
6340       return;
6341     for (const auto &AllocatorData : Allocators) {
6342       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6343                                                         AllocatorData.first);
6344     }
6345   }
6346 };
6347 } // namespace
6348 
6349 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6350     const OMPExecutableDirective &D, StringRef ParentName,
6351     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6352     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6353   assert(!ParentName.empty() && "Invalid target region parent name!");
6354   HasEmittedTargetRegion = true;
6355   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6356   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6357     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6358       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6359       if (!D.AllocatorTraits)
6360         continue;
6361       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6362     }
6363   }
6364   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6365   CodeGen.setAction(UsesAllocatorAction);
6366   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6367                                    IsOffloadEntry, CodeGen);
6368 }
6369 
6370 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6371                                              const Expr *Allocator,
6372                                              const Expr *AllocatorTraits) {
6373   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6374   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6375   // Use default memspace handle.
6376   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6377   llvm::Value *NumTraits = llvm::ConstantInt::get(
6378       CGF.IntTy, cast<ConstantArrayType>(
6379                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6380                      ->getSize()
6381                      .getLimitedValue());
6382   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6383   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6384       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6385   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6386                                            AllocatorTraitsLVal.getBaseInfo(),
6387                                            AllocatorTraitsLVal.getTBAAInfo());
6388   llvm::Value *Traits =
6389       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6390 
6391   llvm::Value *AllocatorVal =
6392       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6393                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6394                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6395   // Store to allocator.
6396   CGF.EmitVarDecl(*cast<VarDecl>(
6397       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6398   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6399   AllocatorVal =
6400       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6401                                Allocator->getType(), Allocator->getExprLoc());
6402   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6403 }
6404 
6405 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6406                                              const Expr *Allocator) {
6407   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6408   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6409   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6410   llvm::Value *AllocatorVal =
6411       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6412   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6413                                           CGF.getContext().VoidPtrTy,
6414                                           Allocator->getExprLoc());
6415   (void)CGF.EmitRuntimeCall(
6416       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6417                                             OMPRTL___kmpc_destroy_allocator),
6418       {ThreadId, AllocatorVal});
6419 }
6420 
6421 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6422     const OMPExecutableDirective &D, StringRef ParentName,
6423     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6424     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6425   // Create a unique name for the entry function using the source location
6426   // information of the current target region. The name will be something like:
6427   //
6428   // __omp_offloading_DD_FFFF_PP_lBB
6429   //
6430   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6431   // mangled name of the function that encloses the target region and BB is the
6432   // line number of the target region.
6433 
6434   unsigned DeviceID;
6435   unsigned FileID;
6436   unsigned Line;
6437   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6438                            Line);
6439   SmallString<64> EntryFnName;
6440   {
6441     llvm::raw_svector_ostream OS(EntryFnName);
6442     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6443        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6444   }
6445 
6446   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6447 
6448   CodeGenFunction CGF(CGM, true);
6449   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6450   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6451 
6452   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6453 
6454   // If this target outline function is not an offload entry, we don't need to
6455   // register it.
6456   if (!IsOffloadEntry)
6457     return;
6458 
6459   // The target region ID is used by the runtime library to identify the current
6460   // target region, so it only has to be unique and not necessarily point to
6461   // anything. It could be the pointer to the outlined function that implements
6462   // the target region, but we aren't using that so that the compiler doesn't
6463   // need to keep that, and could therefore inline the host function if proven
6464   // worthwhile during optimization. In the other hand, if emitting code for the
6465   // device, the ID has to be the function address so that it can retrieved from
6466   // the offloading entry and launched by the runtime library. We also mark the
6467   // outlined function to have external linkage in case we are emitting code for
6468   // the device, because these functions will be entry points to the device.
6469 
6470   if (CGM.getLangOpts().OpenMPIsDevice) {
6471     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6472     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6473     OutlinedFn->setDSOLocal(false);
6474     if (CGM.getTriple().isAMDGCN())
6475       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6476   } else {
6477     std::string Name = getName({EntryFnName, "region_id"});
6478     OutlinedFnID = new llvm::GlobalVariable(
6479         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6480         llvm::GlobalValue::WeakAnyLinkage,
6481         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6482   }
6483 
6484   // Register the information for the entry associated with this target region.
6485   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6486       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6487       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6488 }
6489 
6490 /// Checks if the expression is constant or does not have non-trivial function
6491 /// calls.
6492 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6493   // We can skip constant expressions.
6494   // We can skip expressions with trivial calls or simple expressions.
6495   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6496           !E->hasNonTrivialCall(Ctx)) &&
6497          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6498 }
6499 
6500 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6501                                                     const Stmt *Body) {
6502   const Stmt *Child = Body->IgnoreContainers();
6503   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6504     Child = nullptr;
6505     for (const Stmt *S : C->body()) {
6506       if (const auto *E = dyn_cast<Expr>(S)) {
6507         if (isTrivial(Ctx, E))
6508           continue;
6509       }
6510       // Some of the statements can be ignored.
6511       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6512           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6513         continue;
6514       // Analyze declarations.
6515       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6516         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6517               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6518                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6519                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6520                   isa<UsingDirectiveDecl>(D) ||
6521                   isa<OMPDeclareReductionDecl>(D) ||
6522                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6523                 return true;
6524               const auto *VD = dyn_cast<VarDecl>(D);
6525               if (!VD)
6526                 return false;
6527               return VD->isConstexpr() ||
6528                      ((VD->getType().isTrivialType(Ctx) ||
6529                        VD->getType()->isReferenceType()) &&
6530                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6531             }))
6532           continue;
6533       }
6534       // Found multiple children - cannot get the one child only.
6535       if (Child)
6536         return nullptr;
6537       Child = S;
6538     }
6539     if (Child)
6540       Child = Child->IgnoreContainers();
6541   }
6542   return Child;
6543 }
6544 
6545 /// Emit the number of teams for a target directive.  Inspect the num_teams
6546 /// clause associated with a teams construct combined or closely nested
6547 /// with the target directive.
6548 ///
6549 /// Emit a team of size one for directives such as 'target parallel' that
6550 /// have no associated teams construct.
6551 ///
6552 /// Otherwise, return nullptr.
6553 static llvm::Value *
6554 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6555                                const OMPExecutableDirective &D) {
6556   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6557          "Clauses associated with the teams directive expected to be emitted "
6558          "only for the host!");
6559   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6560   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6561          "Expected target-based executable directive.");
6562   CGBuilderTy &Bld = CGF.Builder;
6563   switch (DirectiveKind) {
6564   case OMPD_target: {
6565     const auto *CS = D.getInnermostCapturedStmt();
6566     const auto *Body =
6567         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6568     const Stmt *ChildStmt =
6569         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6570     if (const auto *NestedDir =
6571             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6572       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6573         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6574           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6575           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6576           const Expr *NumTeams =
6577               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6578           llvm::Value *NumTeamsVal =
6579               CGF.EmitScalarExpr(NumTeams,
6580                                  /*IgnoreResultAssign*/ true);
6581           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6582                                    /*isSigned=*/true);
6583         }
6584         return Bld.getInt32(0);
6585       }
6586       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6587           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6588         return Bld.getInt32(1);
6589       return Bld.getInt32(0);
6590     }
6591     return nullptr;
6592   }
6593   case OMPD_target_teams:
6594   case OMPD_target_teams_distribute:
6595   case OMPD_target_teams_distribute_simd:
6596   case OMPD_target_teams_distribute_parallel_for:
6597   case OMPD_target_teams_distribute_parallel_for_simd: {
6598     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6599       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6600       const Expr *NumTeams =
6601           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6602       llvm::Value *NumTeamsVal =
6603           CGF.EmitScalarExpr(NumTeams,
6604                              /*IgnoreResultAssign*/ true);
6605       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6606                                /*isSigned=*/true);
6607     }
6608     return Bld.getInt32(0);
6609   }
6610   case OMPD_target_parallel:
6611   case OMPD_target_parallel_for:
6612   case OMPD_target_parallel_for_simd:
6613   case OMPD_target_simd:
6614     return Bld.getInt32(1);
6615   case OMPD_parallel:
6616   case OMPD_for:
6617   case OMPD_parallel_for:
6618   case OMPD_parallel_master:
6619   case OMPD_parallel_sections:
6620   case OMPD_for_simd:
6621   case OMPD_parallel_for_simd:
6622   case OMPD_cancel:
6623   case OMPD_cancellation_point:
6624   case OMPD_ordered:
6625   case OMPD_threadprivate:
6626   case OMPD_allocate:
6627   case OMPD_task:
6628   case OMPD_simd:
6629   case OMPD_sections:
6630   case OMPD_section:
6631   case OMPD_single:
6632   case OMPD_master:
6633   case OMPD_critical:
6634   case OMPD_taskyield:
6635   case OMPD_barrier:
6636   case OMPD_taskwait:
6637   case OMPD_taskgroup:
6638   case OMPD_atomic:
6639   case OMPD_flush:
6640   case OMPD_depobj:
6641   case OMPD_scan:
6642   case OMPD_teams:
6643   case OMPD_target_data:
6644   case OMPD_target_exit_data:
6645   case OMPD_target_enter_data:
6646   case OMPD_distribute:
6647   case OMPD_distribute_simd:
6648   case OMPD_distribute_parallel_for:
6649   case OMPD_distribute_parallel_for_simd:
6650   case OMPD_teams_distribute:
6651   case OMPD_teams_distribute_simd:
6652   case OMPD_teams_distribute_parallel_for:
6653   case OMPD_teams_distribute_parallel_for_simd:
6654   case OMPD_target_update:
6655   case OMPD_declare_simd:
6656   case OMPD_declare_variant:
6657   case OMPD_begin_declare_variant:
6658   case OMPD_end_declare_variant:
6659   case OMPD_declare_target:
6660   case OMPD_end_declare_target:
6661   case OMPD_declare_reduction:
6662   case OMPD_declare_mapper:
6663   case OMPD_taskloop:
6664   case OMPD_taskloop_simd:
6665   case OMPD_master_taskloop:
6666   case OMPD_master_taskloop_simd:
6667   case OMPD_parallel_master_taskloop:
6668   case OMPD_parallel_master_taskloop_simd:
6669   case OMPD_requires:
6670   case OMPD_unknown:
6671     break;
6672   default:
6673     break;
6674   }
6675   llvm_unreachable("Unexpected directive kind.");
6676 }
6677 
6678 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6679                                   llvm::Value *DefaultThreadLimitVal) {
6680   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6681       CGF.getContext(), CS->getCapturedStmt());
6682   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6683     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6684       llvm::Value *NumThreads = nullptr;
6685       llvm::Value *CondVal = nullptr;
6686       // Handle if clause. If if clause present, the number of threads is
6687       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6688       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6689         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6690         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6691         const OMPIfClause *IfClause = nullptr;
6692         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6693           if (C->getNameModifier() == OMPD_unknown ||
6694               C->getNameModifier() == OMPD_parallel) {
6695             IfClause = C;
6696             break;
6697           }
6698         }
6699         if (IfClause) {
6700           const Expr *Cond = IfClause->getCondition();
6701           bool Result;
6702           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6703             if (!Result)
6704               return CGF.Builder.getInt32(1);
6705           } else {
6706             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6707             if (const auto *PreInit =
6708                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6709               for (const auto *I : PreInit->decls()) {
6710                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6711                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6712                 } else {
6713                   CodeGenFunction::AutoVarEmission Emission =
6714                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6715                   CGF.EmitAutoVarCleanups(Emission);
6716                 }
6717               }
6718             }
6719             CondVal = CGF.EvaluateExprAsBool(Cond);
6720           }
6721         }
6722       }
6723       // Check the value of num_threads clause iff if clause was not specified
6724       // or is not evaluated to false.
6725       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6726         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6727         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6728         const auto *NumThreadsClause =
6729             Dir->getSingleClause<OMPNumThreadsClause>();
6730         CodeGenFunction::LexicalScope Scope(
6731             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6732         if (const auto *PreInit =
6733                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6734           for (const auto *I : PreInit->decls()) {
6735             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6736               CGF.EmitVarDecl(cast<VarDecl>(*I));
6737             } else {
6738               CodeGenFunction::AutoVarEmission Emission =
6739                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6740               CGF.EmitAutoVarCleanups(Emission);
6741             }
6742           }
6743         }
6744         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6745         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6746                                                /*isSigned=*/false);
6747         if (DefaultThreadLimitVal)
6748           NumThreads = CGF.Builder.CreateSelect(
6749               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6750               DefaultThreadLimitVal, NumThreads);
6751       } else {
6752         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6753                                            : CGF.Builder.getInt32(0);
6754       }
6755       // Process condition of the if clause.
6756       if (CondVal) {
6757         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6758                                               CGF.Builder.getInt32(1));
6759       }
6760       return NumThreads;
6761     }
6762     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6763       return CGF.Builder.getInt32(1);
6764     return DefaultThreadLimitVal;
6765   }
6766   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6767                                : CGF.Builder.getInt32(0);
6768 }
6769 
6770 /// Emit the number of threads for a target directive.  Inspect the
6771 /// thread_limit clause associated with a teams construct combined or closely
6772 /// nested with the target directive.
6773 ///
6774 /// Emit the num_threads clause for directives such as 'target parallel' that
6775 /// have no associated teams construct.
6776 ///
6777 /// Otherwise, return nullptr.
6778 static llvm::Value *
6779 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6780                                  const OMPExecutableDirective &D) {
6781   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6782          "Clauses associated with the teams directive expected to be emitted "
6783          "only for the host!");
6784   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6785   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6786          "Expected target-based executable directive.");
6787   CGBuilderTy &Bld = CGF.Builder;
6788   llvm::Value *ThreadLimitVal = nullptr;
6789   llvm::Value *NumThreadsVal = nullptr;
6790   switch (DirectiveKind) {
6791   case OMPD_target: {
6792     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6793     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6794       return NumThreads;
6795     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6796         CGF.getContext(), CS->getCapturedStmt());
6797     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6798       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6799         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6800         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6801         const auto *ThreadLimitClause =
6802             Dir->getSingleClause<OMPThreadLimitClause>();
6803         CodeGenFunction::LexicalScope Scope(
6804             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6805         if (const auto *PreInit =
6806                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6807           for (const auto *I : PreInit->decls()) {
6808             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6809               CGF.EmitVarDecl(cast<VarDecl>(*I));
6810             } else {
6811               CodeGenFunction::AutoVarEmission Emission =
6812                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6813               CGF.EmitAutoVarCleanups(Emission);
6814             }
6815           }
6816         }
6817         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6818             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6819         ThreadLimitVal =
6820             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6821       }
6822       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6823           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6824         CS = Dir->getInnermostCapturedStmt();
6825         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6826             CGF.getContext(), CS->getCapturedStmt());
6827         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6828       }
6829       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6830           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6831         CS = Dir->getInnermostCapturedStmt();
6832         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6833           return NumThreads;
6834       }
6835       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6836         return Bld.getInt32(1);
6837     }
6838     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6839   }
6840   case OMPD_target_teams: {
6841     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6842       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6843       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6844       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6845           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6846       ThreadLimitVal =
6847           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6848     }
6849     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6850     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6851       return NumThreads;
6852     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6853         CGF.getContext(), CS->getCapturedStmt());
6854     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6855       if (Dir->getDirectiveKind() == OMPD_distribute) {
6856         CS = Dir->getInnermostCapturedStmt();
6857         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6858           return NumThreads;
6859       }
6860     }
6861     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6862   }
6863   case OMPD_target_teams_distribute:
6864     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6865       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6866       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6867       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6868           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6869       ThreadLimitVal =
6870           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6871     }
6872     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6873   case OMPD_target_parallel:
6874   case OMPD_target_parallel_for:
6875   case OMPD_target_parallel_for_simd:
6876   case OMPD_target_teams_distribute_parallel_for:
6877   case OMPD_target_teams_distribute_parallel_for_simd: {
6878     llvm::Value *CondVal = nullptr;
6879     // Handle if clause. If if clause present, the number of threads is
6880     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6881     if (D.hasClausesOfKind<OMPIfClause>()) {
6882       const OMPIfClause *IfClause = nullptr;
6883       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6884         if (C->getNameModifier() == OMPD_unknown ||
6885             C->getNameModifier() == OMPD_parallel) {
6886           IfClause = C;
6887           break;
6888         }
6889       }
6890       if (IfClause) {
6891         const Expr *Cond = IfClause->getCondition();
6892         bool Result;
6893         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6894           if (!Result)
6895             return Bld.getInt32(1);
6896         } else {
6897           CodeGenFunction::RunCleanupsScope Scope(CGF);
6898           CondVal = CGF.EvaluateExprAsBool(Cond);
6899         }
6900       }
6901     }
6902     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6903       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6904       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6905       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6906           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6907       ThreadLimitVal =
6908           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6909     }
6910     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6911       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6912       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6913       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6914           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6915       NumThreadsVal =
6916           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6917       ThreadLimitVal = ThreadLimitVal
6918                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6919                                                                 ThreadLimitVal),
6920                                               NumThreadsVal, ThreadLimitVal)
6921                            : NumThreadsVal;
6922     }
6923     if (!ThreadLimitVal)
6924       ThreadLimitVal = Bld.getInt32(0);
6925     if (CondVal)
6926       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6927     return ThreadLimitVal;
6928   }
6929   case OMPD_target_teams_distribute_simd:
6930   case OMPD_target_simd:
6931     return Bld.getInt32(1);
6932   case OMPD_parallel:
6933   case OMPD_for:
6934   case OMPD_parallel_for:
6935   case OMPD_parallel_master:
6936   case OMPD_parallel_sections:
6937   case OMPD_for_simd:
6938   case OMPD_parallel_for_simd:
6939   case OMPD_cancel:
6940   case OMPD_cancellation_point:
6941   case OMPD_ordered:
6942   case OMPD_threadprivate:
6943   case OMPD_allocate:
6944   case OMPD_task:
6945   case OMPD_simd:
6946   case OMPD_sections:
6947   case OMPD_section:
6948   case OMPD_single:
6949   case OMPD_master:
6950   case OMPD_critical:
6951   case OMPD_taskyield:
6952   case OMPD_barrier:
6953   case OMPD_taskwait:
6954   case OMPD_taskgroup:
6955   case OMPD_atomic:
6956   case OMPD_flush:
6957   case OMPD_depobj:
6958   case OMPD_scan:
6959   case OMPD_teams:
6960   case OMPD_target_data:
6961   case OMPD_target_exit_data:
6962   case OMPD_target_enter_data:
6963   case OMPD_distribute:
6964   case OMPD_distribute_simd:
6965   case OMPD_distribute_parallel_for:
6966   case OMPD_distribute_parallel_for_simd:
6967   case OMPD_teams_distribute:
6968   case OMPD_teams_distribute_simd:
6969   case OMPD_teams_distribute_parallel_for:
6970   case OMPD_teams_distribute_parallel_for_simd:
6971   case OMPD_target_update:
6972   case OMPD_declare_simd:
6973   case OMPD_declare_variant:
6974   case OMPD_begin_declare_variant:
6975   case OMPD_end_declare_variant:
6976   case OMPD_declare_target:
6977   case OMPD_end_declare_target:
6978   case OMPD_declare_reduction:
6979   case OMPD_declare_mapper:
6980   case OMPD_taskloop:
6981   case OMPD_taskloop_simd:
6982   case OMPD_master_taskloop:
6983   case OMPD_master_taskloop_simd:
6984   case OMPD_parallel_master_taskloop:
6985   case OMPD_parallel_master_taskloop_simd:
6986   case OMPD_requires:
6987   case OMPD_unknown:
6988     break;
6989   default:
6990     break;
6991   }
6992   llvm_unreachable("Unsupported directive kind.");
6993 }
6994 
6995 namespace {
6996 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6997 
6998 // Utility to handle information from clauses associated with a given
6999 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7000 // It provides a convenient interface to obtain the information and generate
7001 // code for that information.
7002 class MappableExprsHandler {
7003 public:
7004   /// Values for bit flags used to specify the mapping type for
7005   /// offloading.
7006   enum OpenMPOffloadMappingFlags : uint64_t {
7007     /// No flags
7008     OMP_MAP_NONE = 0x0,
7009     /// Allocate memory on the device and move data from host to device.
7010     OMP_MAP_TO = 0x01,
7011     /// Allocate memory on the device and move data from device to host.
7012     OMP_MAP_FROM = 0x02,
7013     /// Always perform the requested mapping action on the element, even
7014     /// if it was already mapped before.
7015     OMP_MAP_ALWAYS = 0x04,
7016     /// Delete the element from the device environment, ignoring the
7017     /// current reference count associated with the element.
7018     OMP_MAP_DELETE = 0x08,
7019     /// The element being mapped is a pointer-pointee pair; both the
7020     /// pointer and the pointee should be mapped.
7021     OMP_MAP_PTR_AND_OBJ = 0x10,
7022     /// This flags signals that the base address of an entry should be
7023     /// passed to the target kernel as an argument.
7024     OMP_MAP_TARGET_PARAM = 0x20,
7025     /// Signal that the runtime library has to return the device pointer
7026     /// in the current position for the data being mapped. Used when we have the
7027     /// use_device_ptr or use_device_addr clause.
7028     OMP_MAP_RETURN_PARAM = 0x40,
7029     /// This flag signals that the reference being passed is a pointer to
7030     /// private data.
7031     OMP_MAP_PRIVATE = 0x80,
7032     /// Pass the element to the device by value.
7033     OMP_MAP_LITERAL = 0x100,
7034     /// Implicit map
7035     OMP_MAP_IMPLICIT = 0x200,
7036     /// Close is a hint to the runtime to allocate memory close to
7037     /// the target device.
7038     OMP_MAP_CLOSE = 0x400,
7039     /// 0x800 is reserved for compatibility with XLC.
7040     /// Produce a runtime error if the data is not already allocated.
7041     OMP_MAP_PRESENT = 0x1000,
7042     /// Signal that the runtime library should use args as an array of
7043     /// descriptor_dim pointers and use args_size as dims. Used when we have
7044     /// non-contiguous list items in target update directive
7045     OMP_MAP_NON_CONTIG = 0x100000000000,
7046     /// The 16 MSBs of the flags indicate whether the entry is member of some
7047     /// struct/class.
7048     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7049     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7050   };
7051 
7052   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7053   static unsigned getFlagMemberOffset() {
7054     unsigned Offset = 0;
7055     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7056          Remain = Remain >> 1)
7057       Offset++;
7058     return Offset;
7059   }
7060 
7061   /// Class that holds debugging information for a data mapping to be passed to
7062   /// the runtime library.
7063   class MappingExprInfo {
7064     /// The variable declaration used for the data mapping.
7065     const ValueDecl *MapDecl = nullptr;
7066     /// The original expression used in the map clause, or null if there is
7067     /// none.
7068     const Expr *MapExpr = nullptr;
7069 
7070   public:
7071     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7072         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7073 
7074     const ValueDecl *getMapDecl() const { return MapDecl; }
7075     const Expr *getMapExpr() const { return MapExpr; }
7076   };
7077 
7078   /// Class that associates information with a base pointer to be passed to the
7079   /// runtime library.
7080   class BasePointerInfo {
7081     /// The base pointer.
7082     llvm::Value *Ptr = nullptr;
7083     /// The base declaration that refers to this device pointer, or null if
7084     /// there is none.
7085     const ValueDecl *DevPtrDecl = nullptr;
7086 
7087   public:
7088     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7089         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7090     llvm::Value *operator*() const { return Ptr; }
7091     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7092     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7093   };
7094 
7095   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7096   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7097   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7098   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7099   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7100   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7101   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7102 
7103   /// This structure contains combined information generated for mappable
7104   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7105   /// mappers, and non-contiguous information.
7106   struct MapCombinedInfoTy {
7107     struct StructNonContiguousInfo {
7108       bool IsNonContiguous = false;
7109       MapDimArrayTy Dims;
7110       MapNonContiguousArrayTy Offsets;
7111       MapNonContiguousArrayTy Counts;
7112       MapNonContiguousArrayTy Strides;
7113     };
7114     MapExprsArrayTy Exprs;
7115     MapBaseValuesArrayTy BasePointers;
7116     MapValuesArrayTy Pointers;
7117     MapValuesArrayTy Sizes;
7118     MapFlagsArrayTy Types;
7119     MapMappersArrayTy Mappers;
7120     StructNonContiguousInfo NonContigInfo;
7121 
7122     /// Append arrays in \a CurInfo.
7123     void append(MapCombinedInfoTy &CurInfo) {
7124       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7125       BasePointers.append(CurInfo.BasePointers.begin(),
7126                           CurInfo.BasePointers.end());
7127       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7128       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7129       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7130       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7131       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7132                                  CurInfo.NonContigInfo.Dims.end());
7133       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7134                                     CurInfo.NonContigInfo.Offsets.end());
7135       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7136                                    CurInfo.NonContigInfo.Counts.end());
7137       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7138                                     CurInfo.NonContigInfo.Strides.end());
7139     }
7140   };
7141 
7142   /// Map between a struct and the its lowest & highest elements which have been
7143   /// mapped.
7144   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7145   ///                    HE(FieldIndex, Pointer)}
7146   struct StructRangeInfoTy {
7147     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7148         0, Address::invalid()};
7149     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7150         0, Address::invalid()};
7151     Address Base = Address::invalid();
7152     bool IsArraySection = false;
7153   };
7154 
7155 private:
7156   /// Kind that defines how a device pointer has to be returned.
7157   struct MapInfo {
7158     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7159     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7160     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7161     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7162     bool ReturnDevicePointer = false;
7163     bool IsImplicit = false;
7164     const ValueDecl *Mapper = nullptr;
7165     const Expr *VarRef = nullptr;
7166     bool ForDeviceAddr = false;
7167 
7168     MapInfo() = default;
7169     MapInfo(
7170         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7171         OpenMPMapClauseKind MapType,
7172         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7173         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7174         bool ReturnDevicePointer, bool IsImplicit,
7175         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7176         bool ForDeviceAddr = false)
7177         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7178           MotionModifiers(MotionModifiers),
7179           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7180           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7181   };
7182 
7183   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7184   /// member and there is no map information about it, then emission of that
7185   /// entry is deferred until the whole struct has been processed.
7186   struct DeferredDevicePtrEntryTy {
7187     const Expr *IE = nullptr;
7188     const ValueDecl *VD = nullptr;
7189     bool ForDeviceAddr = false;
7190 
7191     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7192                              bool ForDeviceAddr)
7193         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7194   };
7195 
7196   /// The target directive from where the mappable clauses were extracted. It
7197   /// is either a executable directive or a user-defined mapper directive.
7198   llvm::PointerUnion<const OMPExecutableDirective *,
7199                      const OMPDeclareMapperDecl *>
7200       CurDir;
7201 
7202   /// Function the directive is being generated for.
7203   CodeGenFunction &CGF;
7204 
7205   /// Set of all first private variables in the current directive.
7206   /// bool data is set to true if the variable is implicitly marked as
7207   /// firstprivate, false otherwise.
7208   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7209 
7210   /// Map between device pointer declarations and their expression components.
7211   /// The key value for declarations in 'this' is null.
7212   llvm::DenseMap<
7213       const ValueDecl *,
7214       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7215       DevPointersMap;
7216 
7217   llvm::Value *getExprTypeSize(const Expr *E) const {
7218     QualType ExprTy = E->getType().getCanonicalType();
7219 
7220     // Calculate the size for array shaping expression.
7221     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7222       llvm::Value *Size =
7223           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7224       for (const Expr *SE : OAE->getDimensions()) {
7225         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7226         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7227                                       CGF.getContext().getSizeType(),
7228                                       SE->getExprLoc());
7229         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7230       }
7231       return Size;
7232     }
7233 
7234     // Reference types are ignored for mapping purposes.
7235     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7236       ExprTy = RefTy->getPointeeType().getCanonicalType();
7237 
7238     // Given that an array section is considered a built-in type, we need to
7239     // do the calculation based on the length of the section instead of relying
7240     // on CGF.getTypeSize(E->getType()).
7241     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7242       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7243                             OAE->getBase()->IgnoreParenImpCasts())
7244                             .getCanonicalType();
7245 
7246       // If there is no length associated with the expression and lower bound is
7247       // not specified too, that means we are using the whole length of the
7248       // base.
7249       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7250           !OAE->getLowerBound())
7251         return CGF.getTypeSize(BaseTy);
7252 
7253       llvm::Value *ElemSize;
7254       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7255         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7256       } else {
7257         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7258         assert(ATy && "Expecting array type if not a pointer type.");
7259         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7260       }
7261 
7262       // If we don't have a length at this point, that is because we have an
7263       // array section with a single element.
7264       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7265         return ElemSize;
7266 
7267       if (const Expr *LenExpr = OAE->getLength()) {
7268         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7269         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7270                                              CGF.getContext().getSizeType(),
7271                                              LenExpr->getExprLoc());
7272         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7273       }
7274       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7275              OAE->getLowerBound() && "expected array_section[lb:].");
7276       // Size = sizetype - lb * elemtype;
7277       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7278       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7279       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7280                                        CGF.getContext().getSizeType(),
7281                                        OAE->getLowerBound()->getExprLoc());
7282       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7283       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7284       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7285       LengthVal = CGF.Builder.CreateSelect(
7286           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7287       return LengthVal;
7288     }
7289     return CGF.getTypeSize(ExprTy);
7290   }
7291 
7292   /// Return the corresponding bits for a given map clause modifier. Add
7293   /// a flag marking the map as a pointer if requested. Add a flag marking the
7294   /// map as the first one of a series of maps that relate to the same map
7295   /// expression.
7296   OpenMPOffloadMappingFlags getMapTypeBits(
7297       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7298       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7299       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7300     OpenMPOffloadMappingFlags Bits =
7301         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7302     switch (MapType) {
7303     case OMPC_MAP_alloc:
7304     case OMPC_MAP_release:
7305       // alloc and release is the default behavior in the runtime library,  i.e.
7306       // if we don't pass any bits alloc/release that is what the runtime is
7307       // going to do. Therefore, we don't need to signal anything for these two
7308       // type modifiers.
7309       break;
7310     case OMPC_MAP_to:
7311       Bits |= OMP_MAP_TO;
7312       break;
7313     case OMPC_MAP_from:
7314       Bits |= OMP_MAP_FROM;
7315       break;
7316     case OMPC_MAP_tofrom:
7317       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7318       break;
7319     case OMPC_MAP_delete:
7320       Bits |= OMP_MAP_DELETE;
7321       break;
7322     case OMPC_MAP_unknown:
7323       llvm_unreachable("Unexpected map type!");
7324     }
7325     if (AddPtrFlag)
7326       Bits |= OMP_MAP_PTR_AND_OBJ;
7327     if (AddIsTargetParamFlag)
7328       Bits |= OMP_MAP_TARGET_PARAM;
7329     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7330         != MapModifiers.end())
7331       Bits |= OMP_MAP_ALWAYS;
7332     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7333         != MapModifiers.end())
7334       Bits |= OMP_MAP_CLOSE;
7335     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present)
7336         != MapModifiers.end())
7337       Bits |= OMP_MAP_PRESENT;
7338     if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present)
7339         != MotionModifiers.end())
7340       Bits |= OMP_MAP_PRESENT;
7341     if (IsNonContiguous)
7342       Bits |= OMP_MAP_NON_CONTIG;
7343     return Bits;
7344   }
7345 
7346   /// Return true if the provided expression is a final array section. A
7347   /// final array section, is one whose length can't be proved to be one.
7348   bool isFinalArraySectionExpression(const Expr *E) const {
7349     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7350 
7351     // It is not an array section and therefore not a unity-size one.
7352     if (!OASE)
7353       return false;
7354 
7355     // An array section with no colon always refer to a single element.
7356     if (OASE->getColonLocFirst().isInvalid())
7357       return false;
7358 
7359     const Expr *Length = OASE->getLength();
7360 
7361     // If we don't have a length we have to check if the array has size 1
7362     // for this dimension. Also, we should always expect a length if the
7363     // base type is pointer.
7364     if (!Length) {
7365       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7366                              OASE->getBase()->IgnoreParenImpCasts())
7367                              .getCanonicalType();
7368       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7369         return ATy->getSize().getSExtValue() != 1;
7370       // If we don't have a constant dimension length, we have to consider
7371       // the current section as having any size, so it is not necessarily
7372       // unitary. If it happen to be unity size, that's user fault.
7373       return true;
7374     }
7375 
7376     // Check if the length evaluates to 1.
7377     Expr::EvalResult Result;
7378     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7379       return true; // Can have more that size 1.
7380 
7381     llvm::APSInt ConstLength = Result.Val.getInt();
7382     return ConstLength.getSExtValue() != 1;
7383   }
7384 
7385   /// Generate the base pointers, section pointers, sizes, map type bits, and
7386   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7387   /// map type, map or motion modifiers, and expression components.
7388   /// \a IsFirstComponent should be set to true if the provided set of
7389   /// components is the first associated with a capture.
7390   void generateInfoForComponentList(
7391       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7392       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7393       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7394       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7395       bool IsFirstComponentList, bool IsImplicit,
7396       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7397       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7398       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7399           OverlappedElements = llvm::None) const {
7400     // The following summarizes what has to be generated for each map and the
7401     // types below. The generated information is expressed in this order:
7402     // base pointer, section pointer, size, flags
7403     // (to add to the ones that come from the map type and modifier).
7404     //
7405     // double d;
7406     // int i[100];
7407     // float *p;
7408     //
7409     // struct S1 {
7410     //   int i;
7411     //   float f[50];
7412     // }
7413     // struct S2 {
7414     //   int i;
7415     //   float f[50];
7416     //   S1 s;
7417     //   double *p;
7418     //   struct S2 *ps;
7419     // }
7420     // S2 s;
7421     // S2 *ps;
7422     //
7423     // map(d)
7424     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7425     //
7426     // map(i)
7427     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7428     //
7429     // map(i[1:23])
7430     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7431     //
7432     // map(p)
7433     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7434     //
7435     // map(p[1:24])
7436     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7437     // in unified shared memory mode or for local pointers
7438     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7439     //
7440     // map(s)
7441     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7442     //
7443     // map(s.i)
7444     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7445     //
7446     // map(s.s.f)
7447     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7448     //
7449     // map(s.p)
7450     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7451     //
7452     // map(to: s.p[:22])
7453     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7454     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7455     // &(s.p), &(s.p[0]), 22*sizeof(double),
7456     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7457     // (*) alloc space for struct members, only this is a target parameter
7458     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7459     //      optimizes this entry out, same in the examples below)
7460     // (***) map the pointee (map: to)
7461     //
7462     // map(s.ps)
7463     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7464     //
7465     // map(from: s.ps->s.i)
7466     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7467     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7468     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7469     //
7470     // map(to: s.ps->ps)
7471     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7472     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7473     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7474     //
7475     // map(s.ps->ps->ps)
7476     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7477     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7478     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7479     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7480     //
7481     // map(to: s.ps->ps->s.f[:22])
7482     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7483     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7484     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7485     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7486     //
7487     // map(ps)
7488     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7489     //
7490     // map(ps->i)
7491     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7492     //
7493     // map(ps->s.f)
7494     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7495     //
7496     // map(from: ps->p)
7497     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7498     //
7499     // map(to: ps->p[:22])
7500     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7501     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7502     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7503     //
7504     // map(ps->ps)
7505     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7506     //
7507     // map(from: ps->ps->s.i)
7508     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7509     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7510     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7511     //
7512     // map(from: ps->ps->ps)
7513     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7514     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7515     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7516     //
7517     // map(ps->ps->ps->ps)
7518     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7519     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7520     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7521     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7522     //
7523     // map(to: ps->ps->ps->s.f[:22])
7524     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7525     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7526     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7527     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7528     //
7529     // map(to: s.f[:22]) map(from: s.p[:33])
7530     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7531     //     sizeof(double*) (**), TARGET_PARAM
7532     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7533     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7534     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7535     // (*) allocate contiguous space needed to fit all mapped members even if
7536     //     we allocate space for members not mapped (in this example,
7537     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7538     //     them as well because they fall between &s.f[0] and &s.p)
7539     //
7540     // map(from: s.f[:22]) map(to: ps->p[:33])
7541     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7542     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7543     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7544     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7545     // (*) the struct this entry pertains to is the 2nd element in the list of
7546     //     arguments, hence MEMBER_OF(2)
7547     //
7548     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7549     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7550     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7551     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7552     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7553     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7554     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7555     // (*) the struct this entry pertains to is the 4th element in the list
7556     //     of arguments, hence MEMBER_OF(4)
7557 
7558     // Track if the map information being generated is the first for a capture.
7559     bool IsCaptureFirstInfo = IsFirstComponentList;
7560     // When the variable is on a declare target link or in a to clause with
7561     // unified memory, a reference is needed to hold the host/device address
7562     // of the variable.
7563     bool RequiresReference = false;
7564 
7565     // Scan the components from the base to the complete expression.
7566     auto CI = Components.rbegin();
7567     auto CE = Components.rend();
7568     auto I = CI;
7569 
7570     // Track if the map information being generated is the first for a list of
7571     // components.
7572     bool IsExpressionFirstInfo = true;
7573     bool FirstPointerInComplexData = false;
7574     Address BP = Address::invalid();
7575     const Expr *AssocExpr = I->getAssociatedExpression();
7576     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7577     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7578     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7579 
7580     if (isa<MemberExpr>(AssocExpr)) {
7581       // The base is the 'this' pointer. The content of the pointer is going
7582       // to be the base of the field being mapped.
7583       BP = CGF.LoadCXXThisAddress();
7584     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7585                (OASE &&
7586                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7587       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7588     } else if (OAShE &&
7589                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7590       BP = Address(
7591           CGF.EmitScalarExpr(OAShE->getBase()),
7592           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7593     } else {
7594       // The base is the reference to the variable.
7595       // BP = &Var.
7596       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7597       if (const auto *VD =
7598               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7599         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7600                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7601           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7602               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7603                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7604             RequiresReference = true;
7605             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7606           }
7607         }
7608       }
7609 
7610       // If the variable is a pointer and is being dereferenced (i.e. is not
7611       // the last component), the base has to be the pointer itself, not its
7612       // reference. References are ignored for mapping purposes.
7613       QualType Ty =
7614           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7615       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7616         // No need to generate individual map information for the pointer, it
7617         // can be associated with the combined storage if shared memory mode is
7618         // active or the base declaration is not global variable.
7619         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7620         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7621             !VD || VD->hasLocalStorage())
7622           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7623         else
7624           FirstPointerInComplexData = true;
7625         ++I;
7626       }
7627     }
7628 
7629     // Track whether a component of the list should be marked as MEMBER_OF some
7630     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7631     // in a component list should be marked as MEMBER_OF, all subsequent entries
7632     // do not belong to the base struct. E.g.
7633     // struct S2 s;
7634     // s.ps->ps->ps->f[:]
7635     //   (1) (2) (3) (4)
7636     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7637     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7638     // is the pointee of ps(2) which is not member of struct s, so it should not
7639     // be marked as such (it is still PTR_AND_OBJ).
7640     // The variable is initialized to false so that PTR_AND_OBJ entries which
7641     // are not struct members are not considered (e.g. array of pointers to
7642     // data).
7643     bool ShouldBeMemberOf = false;
7644 
7645     // Variable keeping track of whether or not we have encountered a component
7646     // in the component list which is a member expression. Useful when we have a
7647     // pointer or a final array section, in which case it is the previous
7648     // component in the list which tells us whether we have a member expression.
7649     // E.g. X.f[:]
7650     // While processing the final array section "[:]" it is "f" which tells us
7651     // whether we are dealing with a member of a declared struct.
7652     const MemberExpr *EncounteredME = nullptr;
7653 
7654     // Track for the total number of dimension. Start from one for the dummy
7655     // dimension.
7656     uint64_t DimSize = 1;
7657 
7658     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7659 
7660     for (; I != CE; ++I) {
7661       // If the current component is member of a struct (parent struct) mark it.
7662       if (!EncounteredME) {
7663         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7664         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7665         // as MEMBER_OF the parent struct.
7666         if (EncounteredME) {
7667           ShouldBeMemberOf = true;
7668           // Do not emit as complex pointer if this is actually not array-like
7669           // expression.
7670           if (FirstPointerInComplexData) {
7671             QualType Ty = std::prev(I)
7672                               ->getAssociatedDeclaration()
7673                               ->getType()
7674                               .getNonReferenceType();
7675             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7676             FirstPointerInComplexData = false;
7677           }
7678         }
7679       }
7680 
7681       auto Next = std::next(I);
7682 
7683       // We need to generate the addresses and sizes if this is the last
7684       // component, if the component is a pointer or if it is an array section
7685       // whose length can't be proved to be one. If this is a pointer, it
7686       // becomes the base address for the following components.
7687 
7688       // A final array section, is one whose length can't be proved to be one.
7689       // If the map item is non-contiguous then we don't treat any array section
7690       // as final array section.
7691       bool IsFinalArraySection =
7692           !IsNonContiguous &&
7693           isFinalArraySectionExpression(I->getAssociatedExpression());
7694 
7695       // If we have a declaration for the mapping use that, otherwise use
7696       // the base declaration of the map clause.
7697       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7698                                      ? I->getAssociatedDeclaration()
7699                                      : BaseDecl;
7700 
7701       // Get information on whether the element is a pointer. Have to do a
7702       // special treatment for array sections given that they are built-in
7703       // types.
7704       const auto *OASE =
7705           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7706       const auto *OAShE =
7707           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7708       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7709       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7710       bool IsPointer =
7711           OAShE ||
7712           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7713                        .getCanonicalType()
7714                        ->isAnyPointerType()) ||
7715           I->getAssociatedExpression()->getType()->isAnyPointerType();
7716       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7717 
7718       if (OASE)
7719         ++DimSize;
7720 
7721       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7722         // If this is not the last component, we expect the pointer to be
7723         // associated with an array expression or member expression.
7724         assert((Next == CE ||
7725                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7726                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7727                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7728                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7729                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7730                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7731                "Unexpected expression");
7732 
7733         Address LB = Address::invalid();
7734         if (OAShE) {
7735           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7736                        CGF.getContext().getTypeAlignInChars(
7737                            OAShE->getBase()->getType()));
7738         } else {
7739           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7740                    .getAddress(CGF);
7741         }
7742 
7743         // If this component is a pointer inside the base struct then we don't
7744         // need to create any entry for it - it will be combined with the object
7745         // it is pointing to into a single PTR_AND_OBJ entry.
7746         bool IsMemberPointerOrAddr =
7747             (IsPointer || ForDeviceAddr) && EncounteredME &&
7748             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7749              EncounteredME);
7750         if (!OverlappedElements.empty()) {
7751           // Handle base element with the info for overlapped elements.
7752           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7753           assert(Next == CE &&
7754                  "Expected last element for the overlapped elements.");
7755           assert(!IsPointer &&
7756                  "Unexpected base element with the pointer type.");
7757           // Mark the whole struct as the struct that requires allocation on the
7758           // device.
7759           PartialStruct.LowestElem = {0, LB};
7760           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7761               I->getAssociatedExpression()->getType());
7762           Address HB = CGF.Builder.CreateConstGEP(
7763               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7764                                                               CGF.VoidPtrTy),
7765               TypeSize.getQuantity() - 1);
7766           PartialStruct.HighestElem = {
7767               std::numeric_limits<decltype(
7768                   PartialStruct.HighestElem.first)>::max(),
7769               HB};
7770           PartialStruct.Base = BP;
7771           // Emit data for non-overlapped data.
7772           OpenMPOffloadMappingFlags Flags =
7773               OMP_MAP_MEMBER_OF |
7774               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7775                              /*AddPtrFlag=*/false,
7776                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7777           LB = BP;
7778           llvm::Value *Size = nullptr;
7779           // Do bitcopy of all non-overlapped structure elements.
7780           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7781                    Component : OverlappedElements) {
7782             Address ComponentLB = Address::invalid();
7783             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7784                  Component) {
7785               if (MC.getAssociatedDeclaration()) {
7786                 ComponentLB =
7787                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7788                         .getAddress(CGF);
7789                 Size = CGF.Builder.CreatePtrDiff(
7790                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7791                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7792                 break;
7793               }
7794             }
7795             assert(Size && "Failed to determine structure size");
7796             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7797             CombinedInfo.BasePointers.push_back(BP.getPointer());
7798             CombinedInfo.Pointers.push_back(LB.getPointer());
7799             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7800                 Size, CGF.Int64Ty, /*isSigned=*/true));
7801             CombinedInfo.Types.push_back(Flags);
7802             CombinedInfo.Mappers.push_back(nullptr);
7803             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7804                                                                       : 1);
7805             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7806           }
7807           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7808           CombinedInfo.BasePointers.push_back(BP.getPointer());
7809           CombinedInfo.Pointers.push_back(LB.getPointer());
7810           Size = CGF.Builder.CreatePtrDiff(
7811               CGF.EmitCastToVoidPtr(
7812                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7813               CGF.EmitCastToVoidPtr(LB.getPointer()));
7814           CombinedInfo.Sizes.push_back(
7815               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7816           CombinedInfo.Types.push_back(Flags);
7817           CombinedInfo.Mappers.push_back(nullptr);
7818           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7819                                                                     : 1);
7820           break;
7821         }
7822         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7823         if (!IsMemberPointerOrAddr ||
7824             (Next == CE && MapType != OMPC_MAP_unknown)) {
7825           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7826           CombinedInfo.BasePointers.push_back(BP.getPointer());
7827           CombinedInfo.Pointers.push_back(LB.getPointer());
7828           CombinedInfo.Sizes.push_back(
7829               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7830           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7831                                                                     : 1);
7832 
7833           // If Mapper is valid, the last component inherits the mapper.
7834           bool HasMapper = Mapper && Next == CE;
7835           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7836 
7837           // We need to add a pointer flag for each map that comes from the
7838           // same expression except for the first one. We also need to signal
7839           // this map is the first one that relates with the current capture
7840           // (there is a set of entries for each capture).
7841           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7842               MapType, MapModifiers, MotionModifiers, IsImplicit,
7843               !IsExpressionFirstInfo || RequiresReference ||
7844                   FirstPointerInComplexData,
7845               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7846 
7847           if (!IsExpressionFirstInfo) {
7848             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7849             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7850             if (IsPointer)
7851               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7852                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7853 
7854             if (ShouldBeMemberOf) {
7855               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7856               // should be later updated with the correct value of MEMBER_OF.
7857               Flags |= OMP_MAP_MEMBER_OF;
7858               // From now on, all subsequent PTR_AND_OBJ entries should not be
7859               // marked as MEMBER_OF.
7860               ShouldBeMemberOf = false;
7861             }
7862           }
7863 
7864           CombinedInfo.Types.push_back(Flags);
7865         }
7866 
7867         // If we have encountered a member expression so far, keep track of the
7868         // mapped member. If the parent is "*this", then the value declaration
7869         // is nullptr.
7870         if (EncounteredME) {
7871           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7872           unsigned FieldIndex = FD->getFieldIndex();
7873 
7874           // Update info about the lowest and highest elements for this struct
7875           if (!PartialStruct.Base.isValid()) {
7876             PartialStruct.LowestElem = {FieldIndex, LB};
7877             if (IsFinalArraySection) {
7878               Address HB =
7879                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7880                       .getAddress(CGF);
7881               PartialStruct.HighestElem = {FieldIndex, HB};
7882             } else {
7883               PartialStruct.HighestElem = {FieldIndex, LB};
7884             }
7885             PartialStruct.Base = BP;
7886           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7887             PartialStruct.LowestElem = {FieldIndex, LB};
7888           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7889             PartialStruct.HighestElem = {FieldIndex, LB};
7890           }
7891         }
7892 
7893         // Need to emit combined struct for array sections.
7894         if (IsFinalArraySection || IsNonContiguous)
7895           PartialStruct.IsArraySection = true;
7896 
7897         // If we have a final array section, we are done with this expression.
7898         if (IsFinalArraySection)
7899           break;
7900 
7901         // The pointer becomes the base for the next element.
7902         if (Next != CE)
7903           BP = LB;
7904 
7905         IsExpressionFirstInfo = false;
7906         IsCaptureFirstInfo = false;
7907         FirstPointerInComplexData = false;
7908       } else if (FirstPointerInComplexData) {
7909         QualType Ty = Components.rbegin()
7910                           ->getAssociatedDeclaration()
7911                           ->getType()
7912                           .getNonReferenceType();
7913         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7914         FirstPointerInComplexData = false;
7915       }
7916     }
7917 
7918     if (!IsNonContiguous)
7919       return;
7920 
7921     const ASTContext &Context = CGF.getContext();
7922 
7923     // For supporting stride in array section, we need to initialize the first
7924     // dimension size as 1, first offset as 0, and first count as 1
7925     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7926     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7927     MapValuesArrayTy CurStrides;
7928     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7929     uint64_t ElementTypeSize;
7930 
7931     // Collect Size information for each dimension and get the element size as
7932     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7933     // should be [10, 10] and the first stride is 4 btyes.
7934     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7935          Components) {
7936       const Expr *AssocExpr = Component.getAssociatedExpression();
7937       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7938 
7939       if (!OASE)
7940         continue;
7941 
7942       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7943       auto *CAT = Context.getAsConstantArrayType(Ty);
7944       auto *VAT = Context.getAsVariableArrayType(Ty);
7945 
7946       // We need all the dimension size except for the last dimension.
7947       assert((VAT || CAT || &Component == &*Components.begin()) &&
7948              "Should be either ConstantArray or VariableArray if not the "
7949              "first Component");
7950 
7951       // Get element size if CurStrides is empty.
7952       if (CurStrides.empty()) {
7953         const Type *ElementType = nullptr;
7954         if (CAT)
7955           ElementType = CAT->getElementType().getTypePtr();
7956         else if (VAT)
7957           ElementType = VAT->getElementType().getTypePtr();
7958         else
7959           assert(&Component == &*Components.begin() &&
7960                  "Only expect pointer (non CAT or VAT) when this is the "
7961                  "first Component");
7962         // If ElementType is null, then it means the base is a pointer
7963         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7964         // for next iteration.
7965         if (ElementType) {
7966           // For the case that having pointer as base, we need to remove one
7967           // level of indirection.
7968           if (&Component != &*Components.begin())
7969             ElementType = ElementType->getPointeeOrArrayElementType();
7970           ElementTypeSize =
7971               Context.getTypeSizeInChars(ElementType).getQuantity();
7972           CurStrides.push_back(
7973               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7974         }
7975       }
7976       // Get dimension value except for the last dimension since we don't need
7977       // it.
7978       if (DimSizes.size() < Components.size() - 1) {
7979         if (CAT)
7980           DimSizes.push_back(llvm::ConstantInt::get(
7981               CGF.Int64Ty, CAT->getSize().getZExtValue()));
7982         else if (VAT)
7983           DimSizes.push_back(CGF.Builder.CreateIntCast(
7984               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7985               /*IsSigned=*/false));
7986       }
7987     }
7988 
7989     // Skip the dummy dimension since we have already have its information.
7990     auto DI = DimSizes.begin() + 1;
7991     // Product of dimension.
7992     llvm::Value *DimProd =
7993         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7994 
7995     // Collect info for non-contiguous. Notice that offset, count, and stride
7996     // are only meaningful for array-section, so we insert a null for anything
7997     // other than array-section.
7998     // Also, the size of offset, count, and stride are not the same as
7999     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8000     // count, and stride are the same as the number of non-contiguous
8001     // declaration in target update to/from clause.
8002     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8003          Components) {
8004       const Expr *AssocExpr = Component.getAssociatedExpression();
8005 
8006       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8007         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8008             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8009             /*isSigned=*/false);
8010         CurOffsets.push_back(Offset);
8011         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8012         CurStrides.push_back(CurStrides.back());
8013         continue;
8014       }
8015 
8016       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8017 
8018       if (!OASE)
8019         continue;
8020 
8021       // Offset
8022       const Expr *OffsetExpr = OASE->getLowerBound();
8023       llvm::Value *Offset = nullptr;
8024       if (!OffsetExpr) {
8025         // If offset is absent, then we just set it to zero.
8026         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8027       } else {
8028         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8029                                            CGF.Int64Ty,
8030                                            /*isSigned=*/false);
8031       }
8032       CurOffsets.push_back(Offset);
8033 
8034       // Count
8035       const Expr *CountExpr = OASE->getLength();
8036       llvm::Value *Count = nullptr;
8037       if (!CountExpr) {
8038         // In Clang, once a high dimension is an array section, we construct all
8039         // the lower dimension as array section, however, for case like
8040         // arr[0:2][2], Clang construct the inner dimension as an array section
8041         // but it actually is not in an array section form according to spec.
8042         if (!OASE->getColonLocFirst().isValid() &&
8043             !OASE->getColonLocSecond().isValid()) {
8044           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8045         } else {
8046           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8047           // When the length is absent it defaults to ⌈(size −
8048           // lower-bound)/stride⌉, where size is the size of the array
8049           // dimension.
8050           const Expr *StrideExpr = OASE->getStride();
8051           llvm::Value *Stride =
8052               StrideExpr
8053                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8054                                               CGF.Int64Ty, /*isSigned=*/false)
8055                   : nullptr;
8056           if (Stride)
8057             Count = CGF.Builder.CreateUDiv(
8058                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8059           else
8060             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8061         }
8062       } else {
8063         Count = CGF.EmitScalarExpr(CountExpr);
8064       }
8065       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8066       CurCounts.push_back(Count);
8067 
8068       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8069       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8070       //              Offset      Count     Stride
8071       //    D0          0           1         4    (int)    <- dummy dimension
8072       //    D1          0           2         8    (2 * (1) * 4)
8073       //    D2          1           2         20   (1 * (1 * 5) * 4)
8074       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8075       const Expr *StrideExpr = OASE->getStride();
8076       llvm::Value *Stride =
8077           StrideExpr
8078               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8079                                           CGF.Int64Ty, /*isSigned=*/false)
8080               : nullptr;
8081       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8082       if (Stride)
8083         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8084       else
8085         CurStrides.push_back(DimProd);
8086       if (DI != DimSizes.end())
8087         ++DI;
8088     }
8089 
8090     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8091     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8092     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8093   }
8094 
8095   /// Return the adjusted map modifiers if the declaration a capture refers to
8096   /// appears in a first-private clause. This is expected to be used only with
8097   /// directives that start with 'target'.
8098   MappableExprsHandler::OpenMPOffloadMappingFlags
8099   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8100     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8101 
8102     // A first private variable captured by reference will use only the
8103     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8104     // declaration is known as first-private in this handler.
8105     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8106       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8107           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8108         return MappableExprsHandler::OMP_MAP_ALWAYS |
8109                MappableExprsHandler::OMP_MAP_TO;
8110       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8111         return MappableExprsHandler::OMP_MAP_TO |
8112                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8113       return MappableExprsHandler::OMP_MAP_PRIVATE |
8114              MappableExprsHandler::OMP_MAP_TO;
8115     }
8116     return MappableExprsHandler::OMP_MAP_TO |
8117            MappableExprsHandler::OMP_MAP_FROM;
8118   }
8119 
8120   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8121     // Rotate by getFlagMemberOffset() bits.
8122     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8123                                                   << getFlagMemberOffset());
8124   }
8125 
8126   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8127                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8128     // If the entry is PTR_AND_OBJ but has not been marked with the special
8129     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8130     // marked as MEMBER_OF.
8131     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8132         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8133       return;
8134 
8135     // Reset the placeholder value to prepare the flag for the assignment of the
8136     // proper MEMBER_OF value.
8137     Flags &= ~OMP_MAP_MEMBER_OF;
8138     Flags |= MemberOfFlag;
8139   }
8140 
8141   void getPlainLayout(const CXXRecordDecl *RD,
8142                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8143                       bool AsBase) const {
8144     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8145 
8146     llvm::StructType *St =
8147         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8148 
8149     unsigned NumElements = St->getNumElements();
8150     llvm::SmallVector<
8151         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8152         RecordLayout(NumElements);
8153 
8154     // Fill bases.
8155     for (const auto &I : RD->bases()) {
8156       if (I.isVirtual())
8157         continue;
8158       const auto *Base = I.getType()->getAsCXXRecordDecl();
8159       // Ignore empty bases.
8160       if (Base->isEmpty() || CGF.getContext()
8161                                  .getASTRecordLayout(Base)
8162                                  .getNonVirtualSize()
8163                                  .isZero())
8164         continue;
8165 
8166       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8167       RecordLayout[FieldIndex] = Base;
8168     }
8169     // Fill in virtual bases.
8170     for (const auto &I : RD->vbases()) {
8171       const auto *Base = I.getType()->getAsCXXRecordDecl();
8172       // Ignore empty bases.
8173       if (Base->isEmpty())
8174         continue;
8175       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8176       if (RecordLayout[FieldIndex])
8177         continue;
8178       RecordLayout[FieldIndex] = Base;
8179     }
8180     // Fill in all the fields.
8181     assert(!RD->isUnion() && "Unexpected union.");
8182     for (const auto *Field : RD->fields()) {
8183       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8184       // will fill in later.)
8185       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8186         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8187         RecordLayout[FieldIndex] = Field;
8188       }
8189     }
8190     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8191              &Data : RecordLayout) {
8192       if (Data.isNull())
8193         continue;
8194       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8195         getPlainLayout(Base, Layout, /*AsBase=*/true);
8196       else
8197         Layout.push_back(Data.get<const FieldDecl *>());
8198     }
8199   }
8200 
8201 public:
8202   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8203       : CurDir(&Dir), CGF(CGF) {
8204     // Extract firstprivate clause information.
8205     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8206       for (const auto *D : C->varlists())
8207         FirstPrivateDecls.try_emplace(
8208             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8209     // Extract implicit firstprivates from uses_allocators clauses.
8210     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8211       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8212         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8213         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8214           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8215                                         /*Implicit=*/true);
8216         else if (const auto *VD = dyn_cast<VarDecl>(
8217                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8218                          ->getDecl()))
8219           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8220       }
8221     }
8222     // Extract device pointer clause information.
8223     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8224       for (auto L : C->component_lists())
8225         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8226   }
8227 
8228   /// Constructor for the declare mapper directive.
8229   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8230       : CurDir(&Dir), CGF(CGF) {}
8231 
8232   /// Generate code for the combined entry if we have a partially mapped struct
8233   /// and take care of the mapping flags of the arguments corresponding to
8234   /// individual struct members.
8235   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8236                          MapFlagsArrayTy &CurTypes,
8237                          const StructRangeInfoTy &PartialStruct,
8238                          const ValueDecl *VD = nullptr,
8239                          bool NotTargetParams = false) const {
8240     if (CurTypes.size() == 1 &&
8241         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8242         !PartialStruct.IsArraySection)
8243       return;
8244     CombinedInfo.Exprs.push_back(VD);
8245     // Base is the base of the struct
8246     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8247     // Pointer is the address of the lowest element
8248     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
8249     CombinedInfo.Pointers.push_back(LB);
8250     // There should not be a mapper for a combined entry.
8251     CombinedInfo.Mappers.push_back(nullptr);
8252     // Size is (addr of {highest+1} element) - (addr of lowest element)
8253     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
8254     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8255     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8256     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8257     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8258     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8259                                                   /*isSigned=*/false);
8260     CombinedInfo.Sizes.push_back(Size);
8261     // Map type is always TARGET_PARAM, if generate info for captures.
8262     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8263                                                  : OMP_MAP_TARGET_PARAM);
8264     // If any element has the present modifier, then make sure the runtime
8265     // doesn't attempt to allocate the struct.
8266     if (CurTypes.end() !=
8267         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8268           return Type & OMP_MAP_PRESENT;
8269         }))
8270       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8271     // Remove TARGET_PARAM flag from the first element if any.
8272     if (!CurTypes.empty())
8273       CurTypes.front() &= ~OMP_MAP_TARGET_PARAM;
8274 
8275     // All other current entries will be MEMBER_OF the combined entry
8276     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8277     // 0xFFFF in the MEMBER_OF field).
8278     OpenMPOffloadMappingFlags MemberOfFlag =
8279         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8280     for (auto &M : CurTypes)
8281       setCorrectMemberOfFlag(M, MemberOfFlag);
8282   }
8283 
8284   /// Generate all the base pointers, section pointers, sizes, map types, and
8285   /// mappers for the extracted mappable expressions (all included in \a
8286   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8287   /// pair of the relevant declaration and index where it occurs is appended to
8288   /// the device pointers info array.
8289   void generateAllInfo(
8290       MapCombinedInfoTy &CombinedInfo, bool NotTargetParams = false,
8291       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8292           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8293     // We have to process the component lists that relate with the same
8294     // declaration in a single chunk so that we can generate the map flags
8295     // correctly. Therefore, we organize all lists in a map.
8296     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8297 
8298     // Helper function to fill the information map for the different supported
8299     // clauses.
8300     auto &&InfoGen =
8301         [&Info, &SkipVarSet](
8302             const ValueDecl *D,
8303             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8304             OpenMPMapClauseKind MapType,
8305             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8306             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8307             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8308             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8309           const ValueDecl *VD =
8310               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8311           if (SkipVarSet.count(VD))
8312             return;
8313           Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers,
8314                                 ReturnDevicePointer, IsImplicit, Mapper, VarRef,
8315                                 ForDeviceAddr);
8316         };
8317 
8318     assert(CurDir.is<const OMPExecutableDirective *>() &&
8319            "Expect a executable directive");
8320     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8321     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8322       const auto *EI = C->getVarRefs().begin();
8323       for (const auto L : C->component_lists()) {
8324         // The Expression is not correct if the mapping is implicit
8325         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8326         InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(),
8327                 C->getMapTypeModifiers(), llvm::None,
8328                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8329                 E);
8330         ++EI;
8331       }
8332     }
8333     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) {
8334       const auto *EI = C->getVarRefs().begin();
8335       for (const auto L : C->component_lists()) {
8336         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
8337                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8338                 C->isImplicit(), std::get<2>(L), *EI);
8339         ++EI;
8340       }
8341     }
8342     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) {
8343       const auto *EI = C->getVarRefs().begin();
8344       for (const auto L : C->component_lists()) {
8345         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
8346                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8347                 C->isImplicit(), std::get<2>(L), *EI);
8348         ++EI;
8349       }
8350     }
8351 
8352     // Look at the use_device_ptr clause information and mark the existing map
8353     // entries as such. If there is no map information for an entry in the
8354     // use_device_ptr list, we create one with map type 'alloc' and zero size
8355     // section. It is the user fault if that was not mapped before. If there is
8356     // no map information and the pointer is a struct member, then we defer the
8357     // emission of that entry until the whole struct has been processed.
8358     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8359         DeferredInfo;
8360     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8361 
8362     for (const auto *C :
8363          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8364       for (const auto L : C->component_lists()) {
8365         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8366             std::get<1>(L);
8367         assert(!Components.empty() &&
8368                "Not expecting empty list of components!");
8369         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8370         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8371         const Expr *IE = Components.back().getAssociatedExpression();
8372         // If the first component is a member expression, we have to look into
8373         // 'this', which maps to null in the map of map information. Otherwise
8374         // look directly for the information.
8375         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8376 
8377         // We potentially have map information for this declaration already.
8378         // Look for the first set of components that refer to it.
8379         if (It != Info.end()) {
8380           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8381             return MI.Components.back().getAssociatedDeclaration() == VD;
8382           });
8383           // If we found a map entry, signal that the pointer has to be returned
8384           // and move on to the next declaration.
8385           // Exclude cases where the base pointer is mapped as array subscript,
8386           // array section or array shaping. The base address is passed as a
8387           // pointer to base in this case and cannot be used as a base for
8388           // use_device_ptr list item.
8389           if (CI != It->second.end()) {
8390             auto PrevCI = std::next(CI->Components.rbegin());
8391             const auto *VarD = dyn_cast<VarDecl>(VD);
8392             if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8393                 isa<MemberExpr>(IE) ||
8394                 !VD->getType().getNonReferenceType()->isPointerType() ||
8395                 PrevCI == CI->Components.rend() ||
8396                 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8397                 VarD->hasLocalStorage()) {
8398               CI->ReturnDevicePointer = true;
8399               continue;
8400             }
8401           }
8402         }
8403 
8404         // We didn't find any match in our map information - generate a zero
8405         // size array section - if the pointer is a struct member we defer this
8406         // action until the whole struct has been processed.
8407         if (isa<MemberExpr>(IE)) {
8408           // Insert the pointer into Info to be processed by
8409           // generateInfoForComponentList. Because it is a member pointer
8410           // without a pointee, no entry will be generated for it, therefore
8411           // we need to generate one after the whole struct has been processed.
8412           // Nonetheless, generateInfoForComponentList must be called to take
8413           // the pointer into account for the calculation of the range of the
8414           // partial struct.
8415           InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None,
8416                   /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr);
8417           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8418         } else {
8419           llvm::Value *Ptr =
8420               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8421           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8422           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8423           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8424           UseDevicePtrCombinedInfo.Sizes.push_back(
8425               llvm::Constant::getNullValue(CGF.Int64Ty));
8426           UseDevicePtrCombinedInfo.Types.push_back(
8427               OMP_MAP_RETURN_PARAM |
8428               (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM));
8429           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8430         }
8431       }
8432     }
8433 
8434     // Look at the use_device_addr clause information and mark the existing map
8435     // entries as such. If there is no map information for an entry in the
8436     // use_device_addr list, we create one with map type 'alloc' and zero size
8437     // section. It is the user fault if that was not mapped before. If there is
8438     // no map information and the pointer is a struct member, then we defer the
8439     // emission of that entry until the whole struct has been processed.
8440     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8441     for (const auto *C :
8442          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8443       for (const auto L : C->component_lists()) {
8444         assert(!std::get<1>(L).empty() &&
8445                "Not expecting empty list of components!");
8446         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8447         if (!Processed.insert(VD).second)
8448           continue;
8449         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8450         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8451         // If the first component is a member expression, we have to look into
8452         // 'this', which maps to null in the map of map information. Otherwise
8453         // look directly for the information.
8454         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8455 
8456         // We potentially have map information for this declaration already.
8457         // Look for the first set of components that refer to it.
8458         if (It != Info.end()) {
8459           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8460             return MI.Components.back().getAssociatedDeclaration() == VD;
8461           });
8462           // If we found a map entry, signal that the pointer has to be returned
8463           // and move on to the next declaration.
8464           if (CI != It->second.end()) {
8465             CI->ReturnDevicePointer = true;
8466             continue;
8467           }
8468         }
8469 
8470         // We didn't find any match in our map information - generate a zero
8471         // size array section - if the pointer is a struct member we defer this
8472         // action until the whole struct has been processed.
8473         if (isa<MemberExpr>(IE)) {
8474           // Insert the pointer into Info to be processed by
8475           // generateInfoForComponentList. Because it is a member pointer
8476           // without a pointee, no entry will be generated for it, therefore
8477           // we need to generate one after the whole struct has been processed.
8478           // Nonetheless, generateInfoForComponentList must be called to take
8479           // the pointer into account for the calculation of the range of the
8480           // partial struct.
8481           InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8482                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8483                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8484           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8485         } else {
8486           llvm::Value *Ptr;
8487           if (IE->isGLValue())
8488             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8489           else
8490             Ptr = CGF.EmitScalarExpr(IE);
8491           CombinedInfo.Exprs.push_back(VD);
8492           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8493           CombinedInfo.Pointers.push_back(Ptr);
8494           CombinedInfo.Sizes.push_back(
8495               llvm::Constant::getNullValue(CGF.Int64Ty));
8496           CombinedInfo.Types.push_back(
8497               OMP_MAP_RETURN_PARAM |
8498               (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM));
8499           CombinedInfo.Mappers.push_back(nullptr);
8500         }
8501       }
8502     }
8503 
8504     for (const auto &M : Info) {
8505       // We need to know when we generate information for the first component
8506       // associated with a capture, because the mapping flags depend on it.
8507       bool IsFirstComponentList = !NotTargetParams;
8508 
8509       // Underlying variable declaration used in the map clause.
8510       const ValueDecl *VD = std::get<0>(M);
8511 
8512       // Temporary generated information.
8513       MapCombinedInfoTy CurInfo;
8514       StructRangeInfoTy PartialStruct;
8515 
8516       for (const MapInfo &L : M.second) {
8517         assert(!L.Components.empty() &&
8518                "Not expecting declaration with no component lists.");
8519 
8520         // Remember the current base pointer index.
8521         unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8522         CurInfo.NonContigInfo.IsNonContiguous =
8523             L.Components.back().isNonContiguous();
8524         generateInfoForComponentList(
8525             L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo,
8526             PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper,
8527             L.ForDeviceAddr, VD, L.VarRef);
8528 
8529         // If this entry relates with a device pointer, set the relevant
8530         // declaration and add the 'return pointer' flag.
8531         if (L.ReturnDevicePointer) {
8532           assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8533                  "Unexpected number of mapped base pointers.");
8534 
8535           const ValueDecl *RelevantVD =
8536               L.Components.back().getAssociatedDeclaration();
8537           assert(RelevantVD &&
8538                  "No relevant declaration related with device pointer??");
8539 
8540           CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8541               RelevantVD);
8542           CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8543         }
8544         IsFirstComponentList = false;
8545       }
8546 
8547       // Append any pending zero-length pointers which are struct members and
8548       // used with use_device_ptr or use_device_addr.
8549       auto CI = DeferredInfo.find(M.first);
8550       if (CI != DeferredInfo.end()) {
8551         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8552           llvm::Value *BasePtr;
8553           llvm::Value *Ptr;
8554           if (L.ForDeviceAddr) {
8555             if (L.IE->isGLValue())
8556               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8557             else
8558               Ptr = this->CGF.EmitScalarExpr(L.IE);
8559             BasePtr = Ptr;
8560             // Entry is RETURN_PARAM. Also, set the placeholder value
8561             // MEMBER_OF=FFFF so that the entry is later updated with the
8562             // correct value of MEMBER_OF.
8563             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8564           } else {
8565             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8566             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8567                                              L.IE->getExprLoc());
8568             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8569             // value MEMBER_OF=FFFF so that the entry is later updated with the
8570             // correct value of MEMBER_OF.
8571             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8572                                     OMP_MAP_MEMBER_OF);
8573           }
8574           CurInfo.Exprs.push_back(L.VD);
8575           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8576           CurInfo.Pointers.push_back(Ptr);
8577           CurInfo.Sizes.push_back(
8578               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8579           CurInfo.Mappers.push_back(nullptr);
8580         }
8581       }
8582 
8583       // If there is an entry in PartialStruct it means we have a struct with
8584       // individual members mapped. Emit an extra combined entry.
8585       if (PartialStruct.Base.isValid())
8586         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD,
8587                           NotTargetParams);
8588 
8589       // We need to append the results of this capture to what we already have.
8590       CombinedInfo.append(CurInfo);
8591     }
8592     // Append data for use_device_ptr clauses.
8593     CombinedInfo.append(UseDevicePtrCombinedInfo);
8594   }
8595 
8596   /// Generate all the base pointers, section pointers, sizes, map types, and
8597   /// mappers for the extracted map clauses of user-defined mapper (all included
8598   /// in \a CombinedInfo).
8599   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8600     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8601            "Expect a declare mapper directive");
8602     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8603     // We have to process the component lists that relate with the same
8604     // declaration in a single chunk so that we can generate the map flags
8605     // correctly. Therefore, we organize all lists in a map.
8606     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8607 
8608     // Fill the information map for map clauses.
8609     for (const auto *C : CurMapperDir->clauselists()) {
8610       const auto *MC = cast<OMPMapClause>(C);
8611       const auto *EI = MC->getVarRefs().begin();
8612       for (const auto L : MC->component_lists()) {
8613         // The Expression is not correct if the mapping is implicit
8614         const Expr *E = (MC->getMapLoc().isValid()) ? *EI : nullptr;
8615         const ValueDecl *VD =
8616             std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl())
8617                            : nullptr;
8618         // Get the corresponding user-defined mapper.
8619         Info[VD].emplace_back(std::get<1>(L), MC->getMapType(),
8620                               MC->getMapTypeModifiers(), llvm::None,
8621                               /*ReturnDevicePointer=*/false, MC->isImplicit(),
8622                               std::get<2>(L), E);
8623         ++EI;
8624       }
8625     }
8626 
8627     for (const auto &M : Info) {
8628       // We need to know when we generate information for the first component
8629       // associated with a capture, because the mapping flags depend on it.
8630       bool IsFirstComponentList = true;
8631 
8632       // Underlying variable declaration used in the map clause.
8633       const ValueDecl *VD = std::get<0>(M);
8634 
8635       // Temporary generated information.
8636       MapCombinedInfoTy CurInfo;
8637       StructRangeInfoTy PartialStruct;
8638 
8639       for (const MapInfo &L : M.second) {
8640         assert(!L.Components.empty() &&
8641                "Not expecting declaration with no component lists.");
8642         generateInfoForComponentList(
8643             L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo,
8644             PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper,
8645             L.ForDeviceAddr, VD, L.VarRef);
8646         IsFirstComponentList = false;
8647       }
8648 
8649       // If there is an entry in PartialStruct it means we have a struct with
8650       // individual members mapped. Emit an extra combined entry.
8651       if (PartialStruct.Base.isValid()) {
8652         CurInfo.NonContigInfo.Dims.push_back(0);
8653         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8654       }
8655 
8656       // We need to append the results of this capture to what we already have.
8657       CombinedInfo.append(CurInfo);
8658     }
8659   }
8660 
8661   /// Emit capture info for lambdas for variables captured by reference.
8662   void generateInfoForLambdaCaptures(
8663       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8664       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8665     const auto *RD = VD->getType()
8666                          .getCanonicalType()
8667                          .getNonReferenceType()
8668                          ->getAsCXXRecordDecl();
8669     if (!RD || !RD->isLambda())
8670       return;
8671     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8672     LValue VDLVal = CGF.MakeAddrLValue(
8673         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8674     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8675     FieldDecl *ThisCapture = nullptr;
8676     RD->getCaptureFields(Captures, ThisCapture);
8677     if (ThisCapture) {
8678       LValue ThisLVal =
8679           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8680       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8681       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8682                                  VDLVal.getPointer(CGF));
8683       CombinedInfo.Exprs.push_back(VD);
8684       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8685       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8686       CombinedInfo.Sizes.push_back(
8687           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8688                                     CGF.Int64Ty, /*isSigned=*/true));
8689       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8690                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8691       CombinedInfo.Mappers.push_back(nullptr);
8692     }
8693     for (const LambdaCapture &LC : RD->captures()) {
8694       if (!LC.capturesVariable())
8695         continue;
8696       const VarDecl *VD = LC.getCapturedVar();
8697       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8698         continue;
8699       auto It = Captures.find(VD);
8700       assert(It != Captures.end() && "Found lambda capture without field.");
8701       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8702       if (LC.getCaptureKind() == LCK_ByRef) {
8703         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8704         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8705                                    VDLVal.getPointer(CGF));
8706         CombinedInfo.Exprs.push_back(VD);
8707         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8708         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8709         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8710             CGF.getTypeSize(
8711                 VD->getType().getCanonicalType().getNonReferenceType()),
8712             CGF.Int64Ty, /*isSigned=*/true));
8713       } else {
8714         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8715         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8716                                    VDLVal.getPointer(CGF));
8717         CombinedInfo.Exprs.push_back(VD);
8718         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8719         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8720         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8721       }
8722       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8723                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8724       CombinedInfo.Mappers.push_back(nullptr);
8725     }
8726   }
8727 
8728   /// Set correct indices for lambdas captures.
8729   void adjustMemberOfForLambdaCaptures(
8730       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8731       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8732       MapFlagsArrayTy &Types) const {
8733     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8734       // Set correct member_of idx for all implicit lambda captures.
8735       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8736                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8737         continue;
8738       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8739       assert(BasePtr && "Unable to find base lambda address.");
8740       int TgtIdx = -1;
8741       for (unsigned J = I; J > 0; --J) {
8742         unsigned Idx = J - 1;
8743         if (Pointers[Idx] != BasePtr)
8744           continue;
8745         TgtIdx = Idx;
8746         break;
8747       }
8748       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8749       // All other current entries will be MEMBER_OF the combined entry
8750       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8751       // 0xFFFF in the MEMBER_OF field).
8752       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8753       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8754     }
8755   }
8756 
8757   /// Generate the base pointers, section pointers, sizes, map types, and
8758   /// mappers associated to a given capture (all included in \a CombinedInfo).
8759   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8760                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8761                               StructRangeInfoTy &PartialStruct) const {
8762     assert(!Cap->capturesVariableArrayType() &&
8763            "Not expecting to generate map info for a variable array type!");
8764 
8765     // We need to know when we generating information for the first component
8766     const ValueDecl *VD = Cap->capturesThis()
8767                               ? nullptr
8768                               : Cap->getCapturedVar()->getCanonicalDecl();
8769 
8770     // If this declaration appears in a is_device_ptr clause we just have to
8771     // pass the pointer by value. If it is a reference to a declaration, we just
8772     // pass its value.
8773     if (DevPointersMap.count(VD)) {
8774       CombinedInfo.Exprs.push_back(VD);
8775       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8776       CombinedInfo.Pointers.push_back(Arg);
8777       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8778           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8779           /*isSigned=*/true));
8780       CombinedInfo.Types.push_back(
8781           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8782           OMP_MAP_TARGET_PARAM);
8783       CombinedInfo.Mappers.push_back(nullptr);
8784       return;
8785     }
8786 
8787     using MapData =
8788         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8789                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8790                    const ValueDecl *, const Expr *>;
8791     SmallVector<MapData, 4> DeclComponentLists;
8792     assert(CurDir.is<const OMPExecutableDirective *>() &&
8793            "Expect a executable directive");
8794     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8795     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8796       const auto *EI = C->getVarRefs().begin();
8797       for (const auto L : C->decl_component_lists(VD)) {
8798         const ValueDecl *VDecl, *Mapper;
8799         // The Expression is not correct if the mapping is implicit
8800         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8801         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8802         std::tie(VDecl, Components, Mapper) = L;
8803         assert(VDecl == VD && "We got information for the wrong declaration??");
8804         assert(!Components.empty() &&
8805                "Not expecting declaration with no component lists.");
8806         DeclComponentLists.emplace_back(Components, C->getMapType(),
8807                                         C->getMapTypeModifiers(),
8808                                         C->isImplicit(), Mapper, E);
8809         ++EI;
8810       }
8811     }
8812 
8813     // Find overlapping elements (including the offset from the base element).
8814     llvm::SmallDenseMap<
8815         const MapData *,
8816         llvm::SmallVector<
8817             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8818         4>
8819         OverlappedData;
8820     size_t Count = 0;
8821     for (const MapData &L : DeclComponentLists) {
8822       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8823       OpenMPMapClauseKind MapType;
8824       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8825       bool IsImplicit;
8826       const ValueDecl *Mapper;
8827       const Expr *VarRef;
8828       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8829           L;
8830       ++Count;
8831       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8832         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8833         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8834                  VarRef) = L1;
8835         auto CI = Components.rbegin();
8836         auto CE = Components.rend();
8837         auto SI = Components1.rbegin();
8838         auto SE = Components1.rend();
8839         for (; CI != CE && SI != SE; ++CI, ++SI) {
8840           if (CI->getAssociatedExpression()->getStmtClass() !=
8841               SI->getAssociatedExpression()->getStmtClass())
8842             break;
8843           // Are we dealing with different variables/fields?
8844           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8845             break;
8846         }
8847         // Found overlapping if, at least for one component, reached the head of
8848         // the components list.
8849         if (CI == CE || SI == SE) {
8850           assert((CI != CE || SI != SE) &&
8851                  "Unexpected full match of the mapping components.");
8852           const MapData &BaseData = CI == CE ? L : L1;
8853           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8854               SI == SE ? Components : Components1;
8855           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8856           OverlappedElements.getSecond().push_back(SubData);
8857         }
8858       }
8859     }
8860     // Sort the overlapped elements for each item.
8861     llvm::SmallVector<const FieldDecl *, 4> Layout;
8862     if (!OverlappedData.empty()) {
8863       if (const auto *CRD =
8864               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8865         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8866       else {
8867         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8868         Layout.append(RD->field_begin(), RD->field_end());
8869       }
8870     }
8871     for (auto &Pair : OverlappedData) {
8872       llvm::sort(
8873           Pair.getSecond(),
8874           [&Layout](
8875               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8876               OMPClauseMappableExprCommon::MappableExprComponentListRef
8877                   Second) {
8878             auto CI = First.rbegin();
8879             auto CE = First.rend();
8880             auto SI = Second.rbegin();
8881             auto SE = Second.rend();
8882             for (; CI != CE && SI != SE; ++CI, ++SI) {
8883               if (CI->getAssociatedExpression()->getStmtClass() !=
8884                   SI->getAssociatedExpression()->getStmtClass())
8885                 break;
8886               // Are we dealing with different variables/fields?
8887               if (CI->getAssociatedDeclaration() !=
8888                   SI->getAssociatedDeclaration())
8889                 break;
8890             }
8891 
8892             // Lists contain the same elements.
8893             if (CI == CE && SI == SE)
8894               return false;
8895 
8896             // List with less elements is less than list with more elements.
8897             if (CI == CE || SI == SE)
8898               return CI == CE;
8899 
8900             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8901             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8902             if (FD1->getParent() == FD2->getParent())
8903               return FD1->getFieldIndex() < FD2->getFieldIndex();
8904             const auto It =
8905                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8906                   return FD == FD1 || FD == FD2;
8907                 });
8908             return *It == FD1;
8909           });
8910     }
8911 
8912     // Associated with a capture, because the mapping flags depend on it.
8913     // Go through all of the elements with the overlapped elements.
8914     for (const auto &Pair : OverlappedData) {
8915       const MapData &L = *Pair.getFirst();
8916       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8917       OpenMPMapClauseKind MapType;
8918       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8919       bool IsImplicit;
8920       const ValueDecl *Mapper;
8921       const Expr *VarRef;
8922       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8923           L;
8924       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8925           OverlappedComponents = Pair.getSecond();
8926       bool IsFirstComponentList = true;
8927       generateInfoForComponentList(
8928           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
8929           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8930           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8931     }
8932     // Go through other elements without overlapped elements.
8933     bool IsFirstComponentList = OverlappedData.empty();
8934     for (const MapData &L : DeclComponentLists) {
8935       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8936       OpenMPMapClauseKind MapType;
8937       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8938       bool IsImplicit;
8939       const ValueDecl *Mapper;
8940       const Expr *VarRef;
8941       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8942           L;
8943       auto It = OverlappedData.find(&L);
8944       if (It == OverlappedData.end())
8945         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
8946                                      Components, CombinedInfo, PartialStruct,
8947                                      IsFirstComponentList, IsImplicit, Mapper,
8948                                      /*ForDeviceAddr=*/false, VD, VarRef);
8949       IsFirstComponentList = false;
8950     }
8951   }
8952 
8953   /// Generate the default map information for a given capture \a CI,
8954   /// record field declaration \a RI and captured value \a CV.
8955   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8956                               const FieldDecl &RI, llvm::Value *CV,
8957                               MapCombinedInfoTy &CombinedInfo) const {
8958     bool IsImplicit = true;
8959     // Do the default mapping.
8960     if (CI.capturesThis()) {
8961       CombinedInfo.Exprs.push_back(nullptr);
8962       CombinedInfo.BasePointers.push_back(CV);
8963       CombinedInfo.Pointers.push_back(CV);
8964       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8965       CombinedInfo.Sizes.push_back(
8966           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8967                                     CGF.Int64Ty, /*isSigned=*/true));
8968       // Default map type.
8969       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8970     } else if (CI.capturesVariableByCopy()) {
8971       const VarDecl *VD = CI.getCapturedVar();
8972       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8973       CombinedInfo.BasePointers.push_back(CV);
8974       CombinedInfo.Pointers.push_back(CV);
8975       if (!RI.getType()->isAnyPointerType()) {
8976         // We have to signal to the runtime captures passed by value that are
8977         // not pointers.
8978         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
8979         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8980             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8981       } else {
8982         // Pointers are implicitly mapped with a zero size and no flags
8983         // (other than first map that is added for all implicit maps).
8984         CombinedInfo.Types.push_back(OMP_MAP_NONE);
8985         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8986       }
8987       auto I = FirstPrivateDecls.find(VD);
8988       if (I != FirstPrivateDecls.end())
8989         IsImplicit = I->getSecond();
8990     } else {
8991       assert(CI.capturesVariable() && "Expected captured reference.");
8992       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8993       QualType ElementType = PtrTy->getPointeeType();
8994       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8995           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8996       // The default map type for a scalar/complex type is 'to' because by
8997       // default the value doesn't have to be retrieved. For an aggregate
8998       // type, the default is 'tofrom'.
8999       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9000       const VarDecl *VD = CI.getCapturedVar();
9001       auto I = FirstPrivateDecls.find(VD);
9002       if (I != FirstPrivateDecls.end() &&
9003           VD->getType().isConstant(CGF.getContext())) {
9004         llvm::Constant *Addr =
9005             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9006         // Copy the value of the original variable to the new global copy.
9007         CGF.Builder.CreateMemCpy(
9008             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9009             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9010             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
9011         // Use new global variable as the base pointers.
9012         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9013         CombinedInfo.BasePointers.push_back(Addr);
9014         CombinedInfo.Pointers.push_back(Addr);
9015       } else {
9016         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9017         CombinedInfo.BasePointers.push_back(CV);
9018         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9019           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9020               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9021               AlignmentSource::Decl));
9022           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9023         } else {
9024           CombinedInfo.Pointers.push_back(CV);
9025         }
9026       }
9027       if (I != FirstPrivateDecls.end())
9028         IsImplicit = I->getSecond();
9029     }
9030     // Every default map produces a single argument which is a target parameter.
9031     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9032 
9033     // Add flag stating this is an implicit map.
9034     if (IsImplicit)
9035       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9036 
9037     // No user-defined mapper for default mapping.
9038     CombinedInfo.Mappers.push_back(nullptr);
9039   }
9040 };
9041 } // anonymous namespace
9042 
9043 static void emitNonContiguousDescriptor(
9044     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9045     CGOpenMPRuntime::TargetDataInfo &Info) {
9046   CodeGenModule &CGM = CGF.CGM;
9047   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9048       &NonContigInfo = CombinedInfo.NonContigInfo;
9049 
9050   // Build an array of struct descriptor_dim and then assign it to
9051   // offload_args.
9052   //
9053   // struct descriptor_dim {
9054   //  uint64_t offset;
9055   //  uint64_t count;
9056   //  uint64_t stride
9057   // };
9058   ASTContext &C = CGF.getContext();
9059   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9060   RecordDecl *RD;
9061   RD = C.buildImplicitRecord("descriptor_dim");
9062   RD->startDefinition();
9063   addFieldToRecordDecl(C, RD, Int64Ty);
9064   addFieldToRecordDecl(C, RD, Int64Ty);
9065   addFieldToRecordDecl(C, RD, Int64Ty);
9066   RD->completeDefinition();
9067   QualType DimTy = C.getRecordType(RD);
9068 
9069   enum { OffsetFD = 0, CountFD, StrideFD };
9070   // We need two index variable here since the size of "Dims" is the same as the
9071   // size of Components, however, the size of offset, count, and stride is equal
9072   // to the size of base declaration that is non-contiguous.
9073   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9074     // Skip emitting ir if dimension size is 1 since it cannot be
9075     // non-contiguous.
9076     if (NonContigInfo.Dims[I] == 1)
9077       continue;
9078     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9079     QualType ArrayTy =
9080         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9081     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9082     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9083       unsigned RevIdx = EE - II - 1;
9084       LValue DimsLVal = CGF.MakeAddrLValue(
9085           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9086       // Offset
9087       LValue OffsetLVal = CGF.EmitLValueForField(
9088           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9089       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9090       // Count
9091       LValue CountLVal = CGF.EmitLValueForField(
9092           DimsLVal, *std::next(RD->field_begin(), CountFD));
9093       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9094       // Stride
9095       LValue StrideLVal = CGF.EmitLValueForField(
9096           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9097       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9098     }
9099     // args[I] = &dims
9100     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9101         DimsAddr, CGM.Int8PtrTy);
9102     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9103         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9104         Info.PointersArray, 0, I);
9105     Address PAddr(P, CGF.getPointerAlign());
9106     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9107     ++L;
9108   }
9109 }
9110 
9111 /// Emit a string constant containing the names of the values mapped to the
9112 /// offloading runtime library.
9113 llvm::Constant *
9114 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9115                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9116   llvm::Constant *SrcLocStr;
9117   if (!MapExprs.getMapDecl()) {
9118     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9119   } else {
9120     std::string ExprName = "";
9121     if (MapExprs.getMapExpr()) {
9122       PrintingPolicy P(CGF.getContext().getLangOpts());
9123       llvm::raw_string_ostream OS(ExprName);
9124       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9125       OS.flush();
9126     } else {
9127       ExprName = MapExprs.getMapDecl()->getNameAsString();
9128     }
9129 
9130     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9131     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9132     const char *FileName = PLoc.getFilename();
9133     unsigned Line = PLoc.getLine();
9134     unsigned Column = PLoc.getColumn();
9135     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9136                                                 Line, Column);
9137   }
9138 
9139   return SrcLocStr;
9140 }
9141 
9142 /// Emit the arrays used to pass the captures and map information to the
9143 /// offloading runtime library. If there is no map or capture information,
9144 /// return nullptr by reference.
9145 static void emitOffloadingArrays(
9146     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9147     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9148     bool IsNonContiguous = false) {
9149   CodeGenModule &CGM = CGF.CGM;
9150   ASTContext &Ctx = CGF.getContext();
9151 
9152   // Reset the array information.
9153   Info.clearArrayInfo();
9154   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9155 
9156   if (Info.NumberOfPtrs) {
9157     // Detect if we have any capture size requiring runtime evaluation of the
9158     // size so that a constant array could be eventually used.
9159     bool hasRuntimeEvaluationCaptureSize = false;
9160     for (llvm::Value *S : CombinedInfo.Sizes)
9161       if (!isa<llvm::Constant>(S)) {
9162         hasRuntimeEvaluationCaptureSize = true;
9163         break;
9164       }
9165 
9166     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9167     QualType PointerArrayType = Ctx.getConstantArrayType(
9168         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9169         /*IndexTypeQuals=*/0);
9170 
9171     Info.BasePointersArray =
9172         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9173     Info.PointersArray =
9174         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9175     Address MappersArray =
9176         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9177     Info.MappersArray = MappersArray.getPointer();
9178 
9179     // If we don't have any VLA types or other types that require runtime
9180     // evaluation, we can use a constant array for the map sizes, otherwise we
9181     // need to fill up the arrays as we do for the pointers.
9182     QualType Int64Ty =
9183         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9184     if (hasRuntimeEvaluationCaptureSize) {
9185       QualType SizeArrayType = Ctx.getConstantArrayType(
9186           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9187           /*IndexTypeQuals=*/0);
9188       Info.SizesArray =
9189           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9190     } else {
9191       // We expect all the sizes to be constant, so we collect them to create
9192       // a constant array.
9193       SmallVector<llvm::Constant *, 16> ConstSizes;
9194       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9195         if (IsNonContiguous &&
9196             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9197           ConstSizes.push_back(llvm::ConstantInt::get(
9198               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9199         } else {
9200           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9201         }
9202       }
9203 
9204       auto *SizesArrayInit = llvm::ConstantArray::get(
9205           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9206       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9207       auto *SizesArrayGbl = new llvm::GlobalVariable(
9208           CGM.getModule(), SizesArrayInit->getType(),
9209           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9210           SizesArrayInit, Name);
9211       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9212       Info.SizesArray = SizesArrayGbl;
9213     }
9214 
9215     // The map types are always constant so we don't need to generate code to
9216     // fill arrays. Instead, we create an array constant.
9217     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9218     llvm::copy(CombinedInfo.Types, Mapping.begin());
9219     llvm::Constant *MapTypesArrayInit =
9220         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9221     std::string MaptypesName =
9222         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9223     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
9224         CGM.getModule(), MapTypesArrayInit->getType(),
9225         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9226         MapTypesArrayInit, MaptypesName);
9227     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9228     Info.MapTypesArray = MapTypesArrayGbl;
9229 
9230     // The information types are only built if there is debug information
9231     // requested.
9232     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9233       Info.MapNamesArray = llvm::Constant::getNullValue(
9234           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9235     } else {
9236       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9237         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9238       };
9239       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9240       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9241 
9242       llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
9243           llvm::ArrayType::get(
9244               llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(),
9245               CombinedInfo.Exprs.size()),
9246           InfoMap);
9247       auto *MapNamesArrayGbl = new llvm::GlobalVariable(
9248           CGM.getModule(), MapNamesArrayInit->getType(),
9249           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9250           MapNamesArrayInit,
9251           CGM.getOpenMPRuntime().getName({"offload_mapnames"}));
9252       Info.MapNamesArray = MapNamesArrayGbl;
9253     }
9254 
9255     // If there's a present map type modifier, it must not be applied to the end
9256     // of a region, so generate a separate map type array in that case.
9257     if (Info.separateBeginEndCalls()) {
9258       bool EndMapTypesDiffer = false;
9259       for (uint64_t &Type : Mapping) {
9260         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9261           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9262           EndMapTypesDiffer = true;
9263         }
9264       }
9265       if (EndMapTypesDiffer) {
9266         MapTypesArrayInit =
9267             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
9268         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9269         MapTypesArrayGbl = new llvm::GlobalVariable(
9270             CGM.getModule(), MapTypesArrayInit->getType(),
9271             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9272             MapTypesArrayInit, MaptypesName);
9273         MapTypesArrayGbl->setUnnamedAddr(
9274             llvm::GlobalValue::UnnamedAddr::Global);
9275         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9276       }
9277     }
9278 
9279     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9280       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9281       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9282           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9283           Info.BasePointersArray, 0, I);
9284       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9285           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9286       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9287       CGF.Builder.CreateStore(BPVal, BPAddr);
9288 
9289       if (Info.requiresDevicePointerInfo())
9290         if (const ValueDecl *DevVD =
9291                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9292           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9293 
9294       llvm::Value *PVal = CombinedInfo.Pointers[I];
9295       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9296           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9297           Info.PointersArray, 0, I);
9298       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9299           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9300       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9301       CGF.Builder.CreateStore(PVal, PAddr);
9302 
9303       if (hasRuntimeEvaluationCaptureSize) {
9304         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9305             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9306             Info.SizesArray,
9307             /*Idx0=*/0,
9308             /*Idx1=*/I);
9309         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9310         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9311                                                           CGM.Int64Ty,
9312                                                           /*isSigned=*/true),
9313                                 SAddr);
9314       }
9315 
9316       // Fill up the mapper array.
9317       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9318       if (CombinedInfo.Mappers[I]) {
9319         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9320             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9321         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9322         Info.HasMapper = true;
9323       }
9324       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9325       CGF.Builder.CreateStore(MFunc, MAddr);
9326     }
9327   }
9328 
9329   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9330       Info.NumberOfPtrs == 0)
9331     return;
9332 
9333   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9334 }
9335 
9336 namespace {
9337 /// Additional arguments for emitOffloadingArraysArgument function.
9338 struct ArgumentsOptions {
9339   bool ForEndCall = false;
9340   ArgumentsOptions() = default;
9341   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9342 };
9343 } // namespace
9344 
9345 /// Emit the arguments to be passed to the runtime library based on the
9346 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9347 /// ForEndCall, emit map types to be passed for the end of the region instead of
9348 /// the beginning.
9349 static void emitOffloadingArraysArgument(
9350     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9351     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9352     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9353     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9354     const ArgumentsOptions &Options = ArgumentsOptions()) {
9355   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9356          "expected region end call to runtime only when end call is separate");
9357   CodeGenModule &CGM = CGF.CGM;
9358   if (Info.NumberOfPtrs) {
9359     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9360         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9361         Info.BasePointersArray,
9362         /*Idx0=*/0, /*Idx1=*/0);
9363     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9364         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9365         Info.PointersArray,
9366         /*Idx0=*/0,
9367         /*Idx1=*/0);
9368     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9369         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9370         /*Idx0=*/0, /*Idx1=*/0);
9371     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9372         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9373         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9374                                                     : Info.MapTypesArray,
9375         /*Idx0=*/0,
9376         /*Idx1=*/0);
9377 
9378     // Only emit the mapper information arrays if debug information is
9379     // requested.
9380     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9381       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9382     else
9383       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9384           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9385           Info.MapNamesArray,
9386           /*Idx0=*/0,
9387           /*Idx1=*/0);
9388     // If there is no user-defined mapper, set the mapper array to nullptr to
9389     // avoid an unnecessary data privatization
9390     if (!Info.HasMapper)
9391       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9392     else
9393       MappersArrayArg =
9394           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9395   } else {
9396     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9397     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9398     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9399     MapTypesArrayArg =
9400         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9401     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9402     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9403   }
9404 }
9405 
9406 /// Check for inner distribute directive.
9407 static const OMPExecutableDirective *
9408 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9409   const auto *CS = D.getInnermostCapturedStmt();
9410   const auto *Body =
9411       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9412   const Stmt *ChildStmt =
9413       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9414 
9415   if (const auto *NestedDir =
9416           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9417     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9418     switch (D.getDirectiveKind()) {
9419     case OMPD_target:
9420       if (isOpenMPDistributeDirective(DKind))
9421         return NestedDir;
9422       if (DKind == OMPD_teams) {
9423         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9424             /*IgnoreCaptured=*/true);
9425         if (!Body)
9426           return nullptr;
9427         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9428         if (const auto *NND =
9429                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9430           DKind = NND->getDirectiveKind();
9431           if (isOpenMPDistributeDirective(DKind))
9432             return NND;
9433         }
9434       }
9435       return nullptr;
9436     case OMPD_target_teams:
9437       if (isOpenMPDistributeDirective(DKind))
9438         return NestedDir;
9439       return nullptr;
9440     case OMPD_target_parallel:
9441     case OMPD_target_simd:
9442     case OMPD_target_parallel_for:
9443     case OMPD_target_parallel_for_simd:
9444       return nullptr;
9445     case OMPD_target_teams_distribute:
9446     case OMPD_target_teams_distribute_simd:
9447     case OMPD_target_teams_distribute_parallel_for:
9448     case OMPD_target_teams_distribute_parallel_for_simd:
9449     case OMPD_parallel:
9450     case OMPD_for:
9451     case OMPD_parallel_for:
9452     case OMPD_parallel_master:
9453     case OMPD_parallel_sections:
9454     case OMPD_for_simd:
9455     case OMPD_parallel_for_simd:
9456     case OMPD_cancel:
9457     case OMPD_cancellation_point:
9458     case OMPD_ordered:
9459     case OMPD_threadprivate:
9460     case OMPD_allocate:
9461     case OMPD_task:
9462     case OMPD_simd:
9463     case OMPD_sections:
9464     case OMPD_section:
9465     case OMPD_single:
9466     case OMPD_master:
9467     case OMPD_critical:
9468     case OMPD_taskyield:
9469     case OMPD_barrier:
9470     case OMPD_taskwait:
9471     case OMPD_taskgroup:
9472     case OMPD_atomic:
9473     case OMPD_flush:
9474     case OMPD_depobj:
9475     case OMPD_scan:
9476     case OMPD_teams:
9477     case OMPD_target_data:
9478     case OMPD_target_exit_data:
9479     case OMPD_target_enter_data:
9480     case OMPD_distribute:
9481     case OMPD_distribute_simd:
9482     case OMPD_distribute_parallel_for:
9483     case OMPD_distribute_parallel_for_simd:
9484     case OMPD_teams_distribute:
9485     case OMPD_teams_distribute_simd:
9486     case OMPD_teams_distribute_parallel_for:
9487     case OMPD_teams_distribute_parallel_for_simd:
9488     case OMPD_target_update:
9489     case OMPD_declare_simd:
9490     case OMPD_declare_variant:
9491     case OMPD_begin_declare_variant:
9492     case OMPD_end_declare_variant:
9493     case OMPD_declare_target:
9494     case OMPD_end_declare_target:
9495     case OMPD_declare_reduction:
9496     case OMPD_declare_mapper:
9497     case OMPD_taskloop:
9498     case OMPD_taskloop_simd:
9499     case OMPD_master_taskloop:
9500     case OMPD_master_taskloop_simd:
9501     case OMPD_parallel_master_taskloop:
9502     case OMPD_parallel_master_taskloop_simd:
9503     case OMPD_requires:
9504     case OMPD_unknown:
9505     default:
9506       llvm_unreachable("Unexpected directive.");
9507     }
9508   }
9509 
9510   return nullptr;
9511 }
9512 
9513 /// Emit the user-defined mapper function. The code generation follows the
9514 /// pattern in the example below.
9515 /// \code
9516 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9517 ///                                           void *base, void *begin,
9518 ///                                           int64_t size, int64_t type) {
9519 ///   // Allocate space for an array section first.
9520 ///   if (size > 1 && !maptype.IsDelete)
9521 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9522 ///                                 size*sizeof(Ty), clearToFrom(type));
9523 ///   // Map members.
9524 ///   for (unsigned i = 0; i < size; i++) {
9525 ///     // For each component specified by this mapper:
9526 ///     for (auto c : all_components) {
9527 ///       if (c.hasMapper())
9528 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9529 ///                       c.arg_type);
9530 ///       else
9531 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9532 ///                                     c.arg_begin, c.arg_size, c.arg_type);
9533 ///     }
9534 ///   }
9535 ///   // Delete the array section.
9536 ///   if (size > 1 && maptype.IsDelete)
9537 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9538 ///                                 size*sizeof(Ty), clearToFrom(type));
9539 /// }
9540 /// \endcode
9541 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9542                                             CodeGenFunction *CGF) {
9543   if (UDMMap.count(D) > 0)
9544     return;
9545   ASTContext &C = CGM.getContext();
9546   QualType Ty = D->getType();
9547   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9548   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9549   auto *MapperVarDecl =
9550       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9551   SourceLocation Loc = D->getLocation();
9552   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9553 
9554   // Prepare mapper function arguments and attributes.
9555   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9556                               C.VoidPtrTy, ImplicitParamDecl::Other);
9557   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9558                             ImplicitParamDecl::Other);
9559   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9560                              C.VoidPtrTy, ImplicitParamDecl::Other);
9561   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9562                             ImplicitParamDecl::Other);
9563   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9564                             ImplicitParamDecl::Other);
9565   FunctionArgList Args;
9566   Args.push_back(&HandleArg);
9567   Args.push_back(&BaseArg);
9568   Args.push_back(&BeginArg);
9569   Args.push_back(&SizeArg);
9570   Args.push_back(&TypeArg);
9571   const CGFunctionInfo &FnInfo =
9572       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9573   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9574   SmallString<64> TyStr;
9575   llvm::raw_svector_ostream Out(TyStr);
9576   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9577   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9578   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9579                                     Name, &CGM.getModule());
9580   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9581   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9582   // Start the mapper function code generation.
9583   CodeGenFunction MapperCGF(CGM);
9584   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9585   // Compute the starting and end addreses of array elements.
9586   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9587       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9588       C.getPointerType(Int64Ty), Loc);
9589   // Convert the size in bytes into the number of array elements.
9590   Size = MapperCGF.Builder.CreateExactUDiv(
9591       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9592   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9593       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9594       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9595   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9596   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9597       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9598       C.getPointerType(Int64Ty), Loc);
9599   // Prepare common arguments for array initiation and deletion.
9600   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9601       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9602       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9603   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9604       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9605       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9606   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9607       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9608       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9609 
9610   // Emit array initiation if this is an array section and \p MapType indicates
9611   // that memory allocation is required.
9612   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9613   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9614                              ElementSize, HeadBB, /*IsInit=*/true);
9615 
9616   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9617 
9618   // Emit the loop header block.
9619   MapperCGF.EmitBlock(HeadBB);
9620   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9621   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9622   // Evaluate whether the initial condition is satisfied.
9623   llvm::Value *IsEmpty =
9624       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9625   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9626   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9627 
9628   // Emit the loop body block.
9629   MapperCGF.EmitBlock(BodyBB);
9630   llvm::BasicBlock *LastBB = BodyBB;
9631   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9632       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9633   PtrPHI->addIncoming(PtrBegin, EntryBB);
9634   Address PtrCurrent =
9635       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9636                           .getAlignment()
9637                           .alignmentOfArrayElement(ElementSize));
9638   // Privatize the declared variable of mapper to be the current array element.
9639   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9640   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9641     return MapperCGF
9642         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9643         .getAddress(MapperCGF);
9644   });
9645   (void)Scope.Privatize();
9646 
9647   // Get map clause information. Fill up the arrays with all mapped variables.
9648   MappableExprsHandler::MapCombinedInfoTy Info;
9649   MappableExprsHandler MEHandler(*D, MapperCGF);
9650   MEHandler.generateAllInfoForMapper(Info);
9651 
9652   // Call the runtime API __tgt_mapper_num_components to get the number of
9653   // pre-existing components.
9654   llvm::Value *OffloadingArgs[] = {Handle};
9655   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9656       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9657                                             OMPRTL___tgt_mapper_num_components),
9658       OffloadingArgs);
9659   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9660       PreviousSize,
9661       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9662 
9663   // Fill up the runtime mapper handle for all components.
9664   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9665     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9666         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9667     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9668         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9669     llvm::Value *CurSizeArg = Info.Sizes[I];
9670 
9671     // Extract the MEMBER_OF field from the map type.
9672     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9673     MapperCGF.EmitBlock(MemberBB);
9674     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9675     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9676         OriMapType,
9677         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9678     llvm::BasicBlock *MemberCombineBB =
9679         MapperCGF.createBasicBlock("omp.member.combine");
9680     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9681     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9682     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9683     // Add the number of pre-existing components to the MEMBER_OF field if it
9684     // is valid.
9685     MapperCGF.EmitBlock(MemberCombineBB);
9686     llvm::Value *CombinedMember =
9687         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9688     // Do nothing if it is not a member of previous components.
9689     MapperCGF.EmitBlock(TypeBB);
9690     llvm::PHINode *MemberMapType =
9691         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9692     MemberMapType->addIncoming(OriMapType, MemberBB);
9693     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9694 
9695     // Combine the map type inherited from user-defined mapper with that
9696     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9697     // bits of the \a MapType, which is the input argument of the mapper
9698     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9699     // bits of MemberMapType.
9700     // [OpenMP 5.0], 1.2.6. map-type decay.
9701     //        | alloc |  to   | from  | tofrom | release | delete
9702     // ----------------------------------------------------------
9703     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9704     // to     | alloc |  to   | alloc |   to   | release | delete
9705     // from   | alloc | alloc | from  |  from  | release | delete
9706     // tofrom | alloc |  to   | from  | tofrom | release | delete
9707     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9708         MapType,
9709         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9710                                    MappableExprsHandler::OMP_MAP_FROM));
9711     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9712     llvm::BasicBlock *AllocElseBB =
9713         MapperCGF.createBasicBlock("omp.type.alloc.else");
9714     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9715     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9716     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9717     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9718     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9719     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9720     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9721     MapperCGF.EmitBlock(AllocBB);
9722     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9723         MemberMapType,
9724         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9725                                      MappableExprsHandler::OMP_MAP_FROM)));
9726     MapperCGF.Builder.CreateBr(EndBB);
9727     MapperCGF.EmitBlock(AllocElseBB);
9728     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9729         LeftToFrom,
9730         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9731     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9732     // In case of to, clear OMP_MAP_FROM.
9733     MapperCGF.EmitBlock(ToBB);
9734     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9735         MemberMapType,
9736         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9737     MapperCGF.Builder.CreateBr(EndBB);
9738     MapperCGF.EmitBlock(ToElseBB);
9739     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9740         LeftToFrom,
9741         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9742     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9743     // In case of from, clear OMP_MAP_TO.
9744     MapperCGF.EmitBlock(FromBB);
9745     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9746         MemberMapType,
9747         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9748     // In case of tofrom, do nothing.
9749     MapperCGF.EmitBlock(EndBB);
9750     LastBB = EndBB;
9751     llvm::PHINode *CurMapType =
9752         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9753     CurMapType->addIncoming(AllocMapType, AllocBB);
9754     CurMapType->addIncoming(ToMapType, ToBB);
9755     CurMapType->addIncoming(FromMapType, FromBB);
9756     CurMapType->addIncoming(MemberMapType, ToElseBB);
9757 
9758     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9759                                      CurSizeArg, CurMapType};
9760     if (Info.Mappers[I]) {
9761       // Call the corresponding mapper function.
9762       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9763           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9764       assert(MapperFunc && "Expect a valid mapper function is available.");
9765       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9766     } else {
9767       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9768       // data structure.
9769       MapperCGF.EmitRuntimeCall(
9770           OMPBuilder.getOrCreateRuntimeFunction(
9771               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9772           OffloadingArgs);
9773     }
9774   }
9775 
9776   // Update the pointer to point to the next element that needs to be mapped,
9777   // and check whether we have mapped all elements.
9778   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9779       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9780   PtrPHI->addIncoming(PtrNext, LastBB);
9781   llvm::Value *IsDone =
9782       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9783   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9784   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9785 
9786   MapperCGF.EmitBlock(ExitBB);
9787   // Emit array deletion if this is an array section and \p MapType indicates
9788   // that deletion is required.
9789   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9790                              ElementSize, DoneBB, /*IsInit=*/false);
9791 
9792   // Emit the function exit block.
9793   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9794   MapperCGF.FinishFunction();
9795   UDMMap.try_emplace(D, Fn);
9796   if (CGF) {
9797     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9798     Decls.second.push_back(D);
9799   }
9800 }
9801 
9802 /// Emit the array initialization or deletion portion for user-defined mapper
9803 /// code generation. First, it evaluates whether an array section is mapped and
9804 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9805 /// true, and \a MapType indicates to not delete this array, array
9806 /// initialization code is generated. If \a IsInit is false, and \a MapType
9807 /// indicates to not this array, array deletion code is generated.
9808 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9809     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9810     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9811     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9812   StringRef Prefix = IsInit ? ".init" : ".del";
9813 
9814   // Evaluate if this is an array section.
9815   llvm::BasicBlock *IsDeleteBB =
9816       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9817   llvm::BasicBlock *BodyBB =
9818       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9819   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9820       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9821   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9822 
9823   // Evaluate if we are going to delete this section.
9824   MapperCGF.EmitBlock(IsDeleteBB);
9825   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9826       MapType,
9827       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9828   llvm::Value *DeleteCond;
9829   if (IsInit) {
9830     DeleteCond = MapperCGF.Builder.CreateIsNull(
9831         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9832   } else {
9833     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9834         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9835   }
9836   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9837 
9838   MapperCGF.EmitBlock(BodyBB);
9839   // Get the array size by multiplying element size and element number (i.e., \p
9840   // Size).
9841   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9842       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9843   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9844   // memory allocation/deletion purpose only.
9845   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9846       MapType,
9847       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9848                                    MappableExprsHandler::OMP_MAP_FROM)));
9849   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9850   // data structure.
9851   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9852   MapperCGF.EmitRuntimeCall(
9853       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9854                                             OMPRTL___tgt_push_mapper_component),
9855       OffloadingArgs);
9856 }
9857 
9858 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9859     const OMPDeclareMapperDecl *D) {
9860   auto I = UDMMap.find(D);
9861   if (I != UDMMap.end())
9862     return I->second;
9863   emitUserDefinedMapper(D);
9864   return UDMMap.lookup(D);
9865 }
9866 
9867 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9868     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9869     llvm::Value *DeviceID,
9870     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9871                                      const OMPLoopDirective &D)>
9872         SizeEmitter) {
9873   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9874   const OMPExecutableDirective *TD = &D;
9875   // Get nested teams distribute kind directive, if any.
9876   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9877     TD = getNestedDistributeDirective(CGM.getContext(), D);
9878   if (!TD)
9879     return;
9880   const auto *LD = cast<OMPLoopDirective>(TD);
9881   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
9882                                                          PrePostActionTy &) {
9883     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9884       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9885       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
9886       CGF.EmitRuntimeCall(
9887           OMPBuilder.getOrCreateRuntimeFunction(
9888               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9889           Args);
9890     }
9891   };
9892   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9893 }
9894 
9895 void CGOpenMPRuntime::emitTargetCall(
9896     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9897     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9898     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9899     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9900                                      const OMPLoopDirective &D)>
9901         SizeEmitter) {
9902   if (!CGF.HaveInsertPoint())
9903     return;
9904 
9905   assert(OutlinedFn && "Invalid outlined function!");
9906 
9907   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9908                                  D.hasClausesOfKind<OMPNowaitClause>();
9909   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9910   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9911   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9912                                             PrePostActionTy &) {
9913     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9914   };
9915   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9916 
9917   CodeGenFunction::OMPTargetDataInfo InputInfo;
9918   llvm::Value *MapTypesArray = nullptr;
9919   llvm::Value *MapNamesArray = nullptr;
9920   // Fill up the pointer arrays and transfer execution to the device.
9921   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9922                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
9923                     &CapturedVars,
9924                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9925     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9926       // Reverse offloading is not supported, so just execute on the host.
9927       if (RequiresOuterTask) {
9928         CapturedVars.clear();
9929         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9930       }
9931       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9932       return;
9933     }
9934 
9935     // On top of the arrays that were filled up, the target offloading call
9936     // takes as arguments the device id as well as the host pointer. The host
9937     // pointer is used by the runtime library to identify the current target
9938     // region, so it only has to be unique and not necessarily point to
9939     // anything. It could be the pointer to the outlined function that
9940     // implements the target region, but we aren't using that so that the
9941     // compiler doesn't need to keep that, and could therefore inline the host
9942     // function if proven worthwhile during optimization.
9943 
9944     // From this point on, we need to have an ID of the target region defined.
9945     assert(OutlinedFnID && "Invalid outlined function ID!");
9946 
9947     // Emit device ID if any.
9948     llvm::Value *DeviceID;
9949     if (Device.getPointer()) {
9950       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9951               Device.getInt() == OMPC_DEVICE_device_num) &&
9952              "Expected device_num modifier.");
9953       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9954       DeviceID =
9955           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9956     } else {
9957       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9958     }
9959 
9960     // Emit the number of elements in the offloading arrays.
9961     llvm::Value *PointerNum =
9962         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9963 
9964     // Return value of the runtime offloading call.
9965     llvm::Value *Return;
9966 
9967     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9968     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9969 
9970     // Source location for the ident struct
9971     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9972 
9973     // Emit tripcount for the target loop-based directive.
9974     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9975 
9976     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9977     // The target region is an outlined function launched by the runtime
9978     // via calls __tgt_target() or __tgt_target_teams().
9979     //
9980     // __tgt_target() launches a target region with one team and one thread,
9981     // executing a serial region.  This master thread may in turn launch
9982     // more threads within its team upon encountering a parallel region,
9983     // however, no additional teams can be launched on the device.
9984     //
9985     // __tgt_target_teams() launches a target region with one or more teams,
9986     // each with one or more threads.  This call is required for target
9987     // constructs such as:
9988     //  'target teams'
9989     //  'target' / 'teams'
9990     //  'target teams distribute parallel for'
9991     //  'target parallel'
9992     // and so on.
9993     //
9994     // Note that on the host and CPU targets, the runtime implementation of
9995     // these calls simply call the outlined function without forking threads.
9996     // The outlined functions themselves have runtime calls to
9997     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9998     // the compiler in emitTeamsCall() and emitParallelCall().
9999     //
10000     // In contrast, on the NVPTX target, the implementation of
10001     // __tgt_target_teams() launches a GPU kernel with the requested number
10002     // of teams and threads so no additional calls to the runtime are required.
10003     if (NumTeams) {
10004       // If we have NumTeams defined this means that we have an enclosed teams
10005       // region. Therefore we also expect to have NumThreads defined. These two
10006       // values should be defined in the presence of a teams directive,
10007       // regardless of having any clauses associated. If the user is using teams
10008       // but no clauses, these two values will be the default that should be
10009       // passed to the runtime library - a 32-bit integer with the value zero.
10010       assert(NumThreads && "Thread limit expression should be available along "
10011                            "with number of teams.");
10012       llvm::Value *OffloadingArgs[] = {RTLoc,
10013                                        DeviceID,
10014                                        OutlinedFnID,
10015                                        PointerNum,
10016                                        InputInfo.BasePointersArray.getPointer(),
10017                                        InputInfo.PointersArray.getPointer(),
10018                                        InputInfo.SizesArray.getPointer(),
10019                                        MapTypesArray,
10020                                        MapNamesArray,
10021                                        InputInfo.MappersArray.getPointer(),
10022                                        NumTeams,
10023                                        NumThreads};
10024       Return = CGF.EmitRuntimeCall(
10025           OMPBuilder.getOrCreateRuntimeFunction(
10026               CGM.getModule(), HasNowait
10027                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10028                                    : OMPRTL___tgt_target_teams_mapper),
10029           OffloadingArgs);
10030     } else {
10031       llvm::Value *OffloadingArgs[] = {RTLoc,
10032                                        DeviceID,
10033                                        OutlinedFnID,
10034                                        PointerNum,
10035                                        InputInfo.BasePointersArray.getPointer(),
10036                                        InputInfo.PointersArray.getPointer(),
10037                                        InputInfo.SizesArray.getPointer(),
10038                                        MapTypesArray,
10039                                        MapNamesArray,
10040                                        InputInfo.MappersArray.getPointer()};
10041       Return = CGF.EmitRuntimeCall(
10042           OMPBuilder.getOrCreateRuntimeFunction(
10043               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10044                                          : OMPRTL___tgt_target_mapper),
10045           OffloadingArgs);
10046     }
10047 
10048     // Check the error code and execute the host version if required.
10049     llvm::BasicBlock *OffloadFailedBlock =
10050         CGF.createBasicBlock("omp_offload.failed");
10051     llvm::BasicBlock *OffloadContBlock =
10052         CGF.createBasicBlock("omp_offload.cont");
10053     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10054     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10055 
10056     CGF.EmitBlock(OffloadFailedBlock);
10057     if (RequiresOuterTask) {
10058       CapturedVars.clear();
10059       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10060     }
10061     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10062     CGF.EmitBranch(OffloadContBlock);
10063 
10064     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10065   };
10066 
10067   // Notify that the host version must be executed.
10068   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10069                     RequiresOuterTask](CodeGenFunction &CGF,
10070                                        PrePostActionTy &) {
10071     if (RequiresOuterTask) {
10072       CapturedVars.clear();
10073       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10074     }
10075     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10076   };
10077 
10078   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10079                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10080                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10081     // Fill up the arrays with all the captured variables.
10082     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10083 
10084     // Get mappable expression information.
10085     MappableExprsHandler MEHandler(D, CGF);
10086     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10087     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10088 
10089     auto RI = CS.getCapturedRecordDecl()->field_begin();
10090     auto CV = CapturedVars.begin();
10091     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10092                                               CE = CS.capture_end();
10093          CI != CE; ++CI, ++RI, ++CV) {
10094       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10095       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10096 
10097       // VLA sizes are passed to the outlined region by copy and do not have map
10098       // information associated.
10099       if (CI->capturesVariableArrayType()) {
10100         CurInfo.Exprs.push_back(nullptr);
10101         CurInfo.BasePointers.push_back(*CV);
10102         CurInfo.Pointers.push_back(*CV);
10103         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10104             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10105         // Copy to the device as an argument. No need to retrieve it.
10106         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10107                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10108                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10109         CurInfo.Mappers.push_back(nullptr);
10110       } else {
10111         // If we have any information in the map clause, we use it, otherwise we
10112         // just do a default mapping.
10113         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10114         if (!CI->capturesThis())
10115           MappedVarSet.insert(CI->getCapturedVar());
10116         else
10117           MappedVarSet.insert(nullptr);
10118         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10119           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10120         // Generate correct mapping for variables captured by reference in
10121         // lambdas.
10122         if (CI->capturesVariable())
10123           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10124                                                   CurInfo, LambdaPointers);
10125       }
10126       // We expect to have at least an element of information for this capture.
10127       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10128              "Non-existing map pointer for capture!");
10129       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10130              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10131              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10132              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10133              "Inconsistent map information sizes!");
10134 
10135       // If there is an entry in PartialStruct it means we have a struct with
10136       // individual members mapped. Emit an extra combined entry.
10137       if (PartialStruct.Base.isValid())
10138         MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
10139 
10140       // We need to append the results of this capture to what we already have.
10141       CombinedInfo.append(CurInfo);
10142     }
10143     // Adjust MEMBER_OF flags for the lambdas captures.
10144     MEHandler.adjustMemberOfForLambdaCaptures(
10145         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10146         CombinedInfo.Types);
10147     // Map any list items in a map clause that were not captures because they
10148     // weren't referenced within the construct.
10149     MEHandler.generateAllInfo(CombinedInfo, /*NotTargetParams=*/true,
10150                               MappedVarSet);
10151 
10152     TargetDataInfo Info;
10153     // Fill up the arrays and create the arguments.
10154     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10155     emitOffloadingArraysArgument(
10156         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10157         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10158         {/*ForEndTask=*/false});
10159 
10160     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10161     InputInfo.BasePointersArray =
10162         Address(Info.BasePointersArray, CGM.getPointerAlign());
10163     InputInfo.PointersArray =
10164         Address(Info.PointersArray, CGM.getPointerAlign());
10165     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10166     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10167     MapTypesArray = Info.MapTypesArray;
10168     MapNamesArray = Info.MapNamesArray;
10169     if (RequiresOuterTask)
10170       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10171     else
10172       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10173   };
10174 
10175   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10176                              CodeGenFunction &CGF, PrePostActionTy &) {
10177     if (RequiresOuterTask) {
10178       CodeGenFunction::OMPTargetDataInfo InputInfo;
10179       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10180     } else {
10181       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10182     }
10183   };
10184 
10185   // If we have a target function ID it means that we need to support
10186   // offloading, otherwise, just execute on the host. We need to execute on host
10187   // regardless of the conditional in the if clause if, e.g., the user do not
10188   // specify target triples.
10189   if (OutlinedFnID) {
10190     if (IfCond) {
10191       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10192     } else {
10193       RegionCodeGenTy ThenRCG(TargetThenGen);
10194       ThenRCG(CGF);
10195     }
10196   } else {
10197     RegionCodeGenTy ElseRCG(TargetElseGen);
10198     ElseRCG(CGF);
10199   }
10200 }
10201 
10202 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10203                                                     StringRef ParentName) {
10204   if (!S)
10205     return;
10206 
10207   // Codegen OMP target directives that offload compute to the device.
10208   bool RequiresDeviceCodegen =
10209       isa<OMPExecutableDirective>(S) &&
10210       isOpenMPTargetExecutionDirective(
10211           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10212 
10213   if (RequiresDeviceCodegen) {
10214     const auto &E = *cast<OMPExecutableDirective>(S);
10215     unsigned DeviceID;
10216     unsigned FileID;
10217     unsigned Line;
10218     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10219                              FileID, Line);
10220 
10221     // Is this a target region that should not be emitted as an entry point? If
10222     // so just signal we are done with this target region.
10223     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10224                                                             ParentName, Line))
10225       return;
10226 
10227     switch (E.getDirectiveKind()) {
10228     case OMPD_target:
10229       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10230                                                    cast<OMPTargetDirective>(E));
10231       break;
10232     case OMPD_target_parallel:
10233       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10234           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10235       break;
10236     case OMPD_target_teams:
10237       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10238           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10239       break;
10240     case OMPD_target_teams_distribute:
10241       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10242           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10243       break;
10244     case OMPD_target_teams_distribute_simd:
10245       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10246           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10247       break;
10248     case OMPD_target_parallel_for:
10249       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10250           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10251       break;
10252     case OMPD_target_parallel_for_simd:
10253       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10254           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10255       break;
10256     case OMPD_target_simd:
10257       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10258           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10259       break;
10260     case OMPD_target_teams_distribute_parallel_for:
10261       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10262           CGM, ParentName,
10263           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10264       break;
10265     case OMPD_target_teams_distribute_parallel_for_simd:
10266       CodeGenFunction::
10267           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10268               CGM, ParentName,
10269               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10270       break;
10271     case OMPD_parallel:
10272     case OMPD_for:
10273     case OMPD_parallel_for:
10274     case OMPD_parallel_master:
10275     case OMPD_parallel_sections:
10276     case OMPD_for_simd:
10277     case OMPD_parallel_for_simd:
10278     case OMPD_cancel:
10279     case OMPD_cancellation_point:
10280     case OMPD_ordered:
10281     case OMPD_threadprivate:
10282     case OMPD_allocate:
10283     case OMPD_task:
10284     case OMPD_simd:
10285     case OMPD_sections:
10286     case OMPD_section:
10287     case OMPD_single:
10288     case OMPD_master:
10289     case OMPD_critical:
10290     case OMPD_taskyield:
10291     case OMPD_barrier:
10292     case OMPD_taskwait:
10293     case OMPD_taskgroup:
10294     case OMPD_atomic:
10295     case OMPD_flush:
10296     case OMPD_depobj:
10297     case OMPD_scan:
10298     case OMPD_teams:
10299     case OMPD_target_data:
10300     case OMPD_target_exit_data:
10301     case OMPD_target_enter_data:
10302     case OMPD_distribute:
10303     case OMPD_distribute_simd:
10304     case OMPD_distribute_parallel_for:
10305     case OMPD_distribute_parallel_for_simd:
10306     case OMPD_teams_distribute:
10307     case OMPD_teams_distribute_simd:
10308     case OMPD_teams_distribute_parallel_for:
10309     case OMPD_teams_distribute_parallel_for_simd:
10310     case OMPD_target_update:
10311     case OMPD_declare_simd:
10312     case OMPD_declare_variant:
10313     case OMPD_begin_declare_variant:
10314     case OMPD_end_declare_variant:
10315     case OMPD_declare_target:
10316     case OMPD_end_declare_target:
10317     case OMPD_declare_reduction:
10318     case OMPD_declare_mapper:
10319     case OMPD_taskloop:
10320     case OMPD_taskloop_simd:
10321     case OMPD_master_taskloop:
10322     case OMPD_master_taskloop_simd:
10323     case OMPD_parallel_master_taskloop:
10324     case OMPD_parallel_master_taskloop_simd:
10325     case OMPD_requires:
10326     case OMPD_unknown:
10327     default:
10328       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10329     }
10330     return;
10331   }
10332 
10333   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10334     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10335       return;
10336 
10337     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10338     return;
10339   }
10340 
10341   // If this is a lambda function, look into its body.
10342   if (const auto *L = dyn_cast<LambdaExpr>(S))
10343     S = L->getBody();
10344 
10345   // Keep looking for target regions recursively.
10346   for (const Stmt *II : S->children())
10347     scanForTargetRegionsFunctions(II, ParentName);
10348 }
10349 
10350 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10351   // If emitting code for the host, we do not process FD here. Instead we do
10352   // the normal code generation.
10353   if (!CGM.getLangOpts().OpenMPIsDevice) {
10354     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
10355       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10356           OMPDeclareTargetDeclAttr::getDeviceType(FD);
10357       // Do not emit device_type(nohost) functions for the host.
10358       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10359         return true;
10360     }
10361     return false;
10362   }
10363 
10364   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10365   // Try to detect target regions in the function.
10366   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10367     StringRef Name = CGM.getMangledName(GD);
10368     scanForTargetRegionsFunctions(FD->getBody(), Name);
10369     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10370         OMPDeclareTargetDeclAttr::getDeviceType(FD);
10371     // Do not emit device_type(nohost) functions for the host.
10372     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10373       return true;
10374   }
10375 
10376   // Do not to emit function if it is not marked as declare target.
10377   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10378          AlreadyEmittedTargetDecls.count(VD) == 0;
10379 }
10380 
10381 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10382   if (!CGM.getLangOpts().OpenMPIsDevice)
10383     return false;
10384 
10385   // Check if there are Ctors/Dtors in this declaration and look for target
10386   // regions in it. We use the complete variant to produce the kernel name
10387   // mangling.
10388   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10389   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10390     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10391       StringRef ParentName =
10392           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10393       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10394     }
10395     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10396       StringRef ParentName =
10397           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10398       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10399     }
10400   }
10401 
10402   // Do not to emit variable if it is not marked as declare target.
10403   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10404       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10405           cast<VarDecl>(GD.getDecl()));
10406   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10407       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10408        HasRequiresUnifiedSharedMemory)) {
10409     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10410     return true;
10411   }
10412   return false;
10413 }
10414 
10415 llvm::Constant *
10416 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10417                                                 const VarDecl *VD) {
10418   assert(VD->getType().isConstant(CGM.getContext()) &&
10419          "Expected constant variable.");
10420   StringRef VarName;
10421   llvm::Constant *Addr;
10422   llvm::GlobalValue::LinkageTypes Linkage;
10423   QualType Ty = VD->getType();
10424   SmallString<128> Buffer;
10425   {
10426     unsigned DeviceID;
10427     unsigned FileID;
10428     unsigned Line;
10429     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10430                              FileID, Line);
10431     llvm::raw_svector_ostream OS(Buffer);
10432     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10433        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10434     VarName = OS.str();
10435   }
10436   Linkage = llvm::GlobalValue::InternalLinkage;
10437   Addr =
10438       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10439                                   getDefaultFirstprivateAddressSpace());
10440   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10441   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10442   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10443   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10444       VarName, Addr, VarSize,
10445       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10446   return Addr;
10447 }
10448 
10449 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10450                                                    llvm::Constant *Addr) {
10451   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10452       !CGM.getLangOpts().OpenMPIsDevice)
10453     return;
10454   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10455       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10456   if (!Res) {
10457     if (CGM.getLangOpts().OpenMPIsDevice) {
10458       // Register non-target variables being emitted in device code (debug info
10459       // may cause this).
10460       StringRef VarName = CGM.getMangledName(VD);
10461       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10462     }
10463     return;
10464   }
10465   // Register declare target variables.
10466   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10467   StringRef VarName;
10468   CharUnits VarSize;
10469   llvm::GlobalValue::LinkageTypes Linkage;
10470 
10471   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10472       !HasRequiresUnifiedSharedMemory) {
10473     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10474     VarName = CGM.getMangledName(VD);
10475     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10476       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10477       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10478     } else {
10479       VarSize = CharUnits::Zero();
10480     }
10481     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10482     // Temp solution to prevent optimizations of the internal variables.
10483     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10484       std::string RefName = getName({VarName, "ref"});
10485       if (!CGM.GetGlobalValue(RefName)) {
10486         llvm::Constant *AddrRef =
10487             getOrCreateInternalVariable(Addr->getType(), RefName);
10488         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10489         GVAddrRef->setConstant(/*Val=*/true);
10490         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10491         GVAddrRef->setInitializer(Addr);
10492         CGM.addCompilerUsedGlobal(GVAddrRef);
10493       }
10494     }
10495   } else {
10496     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10497             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10498              HasRequiresUnifiedSharedMemory)) &&
10499            "Declare target attribute must link or to with unified memory.");
10500     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10501       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10502     else
10503       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10504 
10505     if (CGM.getLangOpts().OpenMPIsDevice) {
10506       VarName = Addr->getName();
10507       Addr = nullptr;
10508     } else {
10509       VarName = getAddrOfDeclareTargetVar(VD).getName();
10510       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10511     }
10512     VarSize = CGM.getPointerSize();
10513     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10514   }
10515 
10516   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10517       VarName, Addr, VarSize, Flags, Linkage);
10518 }
10519 
10520 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10521   if (isa<FunctionDecl>(GD.getDecl()) ||
10522       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10523     return emitTargetFunctions(GD);
10524 
10525   return emitTargetGlobalVariable(GD);
10526 }
10527 
10528 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10529   for (const VarDecl *VD : DeferredGlobalVariables) {
10530     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10531         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10532     if (!Res)
10533       continue;
10534     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10535         !HasRequiresUnifiedSharedMemory) {
10536       CGM.EmitGlobal(VD);
10537     } else {
10538       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10539               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10540                HasRequiresUnifiedSharedMemory)) &&
10541              "Expected link clause or to clause with unified memory.");
10542       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10543     }
10544   }
10545 }
10546 
10547 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10548     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10549   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10550          " Expected target-based directive.");
10551 }
10552 
10553 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10554   for (const OMPClause *Clause : D->clauselists()) {
10555     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10556       HasRequiresUnifiedSharedMemory = true;
10557     } else if (const auto *AC =
10558                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10559       switch (AC->getAtomicDefaultMemOrderKind()) {
10560       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10561         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10562         break;
10563       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10564         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10565         break;
10566       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10567         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10568         break;
10569       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10570         break;
10571       }
10572     }
10573   }
10574 }
10575 
10576 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10577   return RequiresAtomicOrdering;
10578 }
10579 
10580 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10581                                                        LangAS &AS) {
10582   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10583     return false;
10584   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10585   switch(A->getAllocatorType()) {
10586   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10587   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10588   // Not supported, fallback to the default mem space.
10589   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10590   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10591   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10592   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10593   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10594   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10595   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10596     AS = LangAS::Default;
10597     return true;
10598   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10599     llvm_unreachable("Expected predefined allocator for the variables with the "
10600                      "static storage.");
10601   }
10602   return false;
10603 }
10604 
10605 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10606   return HasRequiresUnifiedSharedMemory;
10607 }
10608 
10609 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10610     CodeGenModule &CGM)
10611     : CGM(CGM) {
10612   if (CGM.getLangOpts().OpenMPIsDevice) {
10613     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10614     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10615   }
10616 }
10617 
10618 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10619   if (CGM.getLangOpts().OpenMPIsDevice)
10620     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10621 }
10622 
10623 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10624   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10625     return true;
10626 
10627   const auto *D = cast<FunctionDecl>(GD.getDecl());
10628   // Do not to emit function if it is marked as declare target as it was already
10629   // emitted.
10630   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10631     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10632       if (auto *F = dyn_cast_or_null<llvm::Function>(
10633               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10634         return !F->isDeclaration();
10635       return false;
10636     }
10637     return true;
10638   }
10639 
10640   return !AlreadyEmittedTargetDecls.insert(D).second;
10641 }
10642 
10643 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10644   // If we don't have entries or if we are emitting code for the device, we
10645   // don't need to do anything.
10646   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10647       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10648       (OffloadEntriesInfoManager.empty() &&
10649        !HasEmittedDeclareTargetRegion &&
10650        !HasEmittedTargetRegion))
10651     return nullptr;
10652 
10653   // Create and register the function that handles the requires directives.
10654   ASTContext &C = CGM.getContext();
10655 
10656   llvm::Function *RequiresRegFn;
10657   {
10658     CodeGenFunction CGF(CGM);
10659     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10660     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10661     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10662     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10663     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10664     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10665     // TODO: check for other requires clauses.
10666     // The requires directive takes effect only when a target region is
10667     // present in the compilation unit. Otherwise it is ignored and not
10668     // passed to the runtime. This avoids the runtime from throwing an error
10669     // for mismatching requires clauses across compilation units that don't
10670     // contain at least 1 target region.
10671     assert((HasEmittedTargetRegion ||
10672             HasEmittedDeclareTargetRegion ||
10673             !OffloadEntriesInfoManager.empty()) &&
10674            "Target or declare target region expected.");
10675     if (HasRequiresUnifiedSharedMemory)
10676       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10677     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10678                             CGM.getModule(), OMPRTL___tgt_register_requires),
10679                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10680     CGF.FinishFunction();
10681   }
10682   return RequiresRegFn;
10683 }
10684 
10685 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10686                                     const OMPExecutableDirective &D,
10687                                     SourceLocation Loc,
10688                                     llvm::Function *OutlinedFn,
10689                                     ArrayRef<llvm::Value *> CapturedVars) {
10690   if (!CGF.HaveInsertPoint())
10691     return;
10692 
10693   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10694   CodeGenFunction::RunCleanupsScope Scope(CGF);
10695 
10696   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10697   llvm::Value *Args[] = {
10698       RTLoc,
10699       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10700       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10701   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10702   RealArgs.append(std::begin(Args), std::end(Args));
10703   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10704 
10705   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10706       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10707   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10708 }
10709 
10710 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10711                                          const Expr *NumTeams,
10712                                          const Expr *ThreadLimit,
10713                                          SourceLocation Loc) {
10714   if (!CGF.HaveInsertPoint())
10715     return;
10716 
10717   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10718 
10719   llvm::Value *NumTeamsVal =
10720       NumTeams
10721           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10722                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10723           : CGF.Builder.getInt32(0);
10724 
10725   llvm::Value *ThreadLimitVal =
10726       ThreadLimit
10727           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10728                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10729           : CGF.Builder.getInt32(0);
10730 
10731   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10732   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10733                                      ThreadLimitVal};
10734   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10735                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10736                       PushNumTeamsArgs);
10737 }
10738 
10739 void CGOpenMPRuntime::emitTargetDataCalls(
10740     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10741     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10742   if (!CGF.HaveInsertPoint())
10743     return;
10744 
10745   // Action used to replace the default codegen action and turn privatization
10746   // off.
10747   PrePostActionTy NoPrivAction;
10748 
10749   // Generate the code for the opening of the data environment. Capture all the
10750   // arguments of the runtime call by reference because they are used in the
10751   // closing of the region.
10752   auto &&BeginThenGen = [this, &D, Device, &Info,
10753                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10754     // Fill up the arrays with all the mapped variables.
10755     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10756 
10757     // Get map clause information.
10758     MappableExprsHandler MEHandler(D, CGF);
10759     MEHandler.generateAllInfo(CombinedInfo);
10760 
10761     // Fill up the arrays and create the arguments.
10762     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10763                          /*IsNonContiguous=*/true);
10764 
10765     llvm::Value *BasePointersArrayArg = nullptr;
10766     llvm::Value *PointersArrayArg = nullptr;
10767     llvm::Value *SizesArrayArg = nullptr;
10768     llvm::Value *MapTypesArrayArg = nullptr;
10769     llvm::Value *MapNamesArrayArg = nullptr;
10770     llvm::Value *MappersArrayArg = nullptr;
10771     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10772                                  SizesArrayArg, MapTypesArrayArg,
10773                                  MapNamesArrayArg, MappersArrayArg, Info);
10774 
10775     // Emit device ID if any.
10776     llvm::Value *DeviceID = nullptr;
10777     if (Device) {
10778       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10779                                            CGF.Int64Ty, /*isSigned=*/true);
10780     } else {
10781       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10782     }
10783 
10784     // Emit the number of elements in the offloading arrays.
10785     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10786     //
10787     // Source location for the ident struct
10788     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10789 
10790     llvm::Value *OffloadingArgs[] = {RTLoc,
10791                                      DeviceID,
10792                                      PointerNum,
10793                                      BasePointersArrayArg,
10794                                      PointersArrayArg,
10795                                      SizesArrayArg,
10796                                      MapTypesArrayArg,
10797                                      MapNamesArrayArg,
10798                                      MappersArrayArg};
10799     CGF.EmitRuntimeCall(
10800         OMPBuilder.getOrCreateRuntimeFunction(
10801             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10802         OffloadingArgs);
10803 
10804     // If device pointer privatization is required, emit the body of the region
10805     // here. It will have to be duplicated: with and without privatization.
10806     if (!Info.CaptureDeviceAddrMap.empty())
10807       CodeGen(CGF);
10808   };
10809 
10810   // Generate code for the closing of the data region.
10811   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10812                                                 PrePostActionTy &) {
10813     assert(Info.isValid() && "Invalid data environment closing arguments.");
10814 
10815     llvm::Value *BasePointersArrayArg = nullptr;
10816     llvm::Value *PointersArrayArg = nullptr;
10817     llvm::Value *SizesArrayArg = nullptr;
10818     llvm::Value *MapTypesArrayArg = nullptr;
10819     llvm::Value *MapNamesArrayArg = nullptr;
10820     llvm::Value *MappersArrayArg = nullptr;
10821     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10822                                  SizesArrayArg, MapTypesArrayArg,
10823                                  MapNamesArrayArg, MappersArrayArg, Info,
10824                                  {/*ForEndCall=*/true});
10825 
10826     // Emit device ID if any.
10827     llvm::Value *DeviceID = nullptr;
10828     if (Device) {
10829       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10830                                            CGF.Int64Ty, /*isSigned=*/true);
10831     } else {
10832       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10833     }
10834 
10835     // Emit the number of elements in the offloading arrays.
10836     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10837 
10838     // Source location for the ident struct
10839     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10840 
10841     llvm::Value *OffloadingArgs[] = {RTLoc,
10842                                      DeviceID,
10843                                      PointerNum,
10844                                      BasePointersArrayArg,
10845                                      PointersArrayArg,
10846                                      SizesArrayArg,
10847                                      MapTypesArrayArg,
10848                                      MapNamesArrayArg,
10849                                      MappersArrayArg};
10850     CGF.EmitRuntimeCall(
10851         OMPBuilder.getOrCreateRuntimeFunction(
10852             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10853         OffloadingArgs);
10854   };
10855 
10856   // If we need device pointer privatization, we need to emit the body of the
10857   // region with no privatization in the 'else' branch of the conditional.
10858   // Otherwise, we don't have to do anything.
10859   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10860                                                          PrePostActionTy &) {
10861     if (!Info.CaptureDeviceAddrMap.empty()) {
10862       CodeGen.setAction(NoPrivAction);
10863       CodeGen(CGF);
10864     }
10865   };
10866 
10867   // We don't have to do anything to close the region if the if clause evaluates
10868   // to false.
10869   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10870 
10871   if (IfCond) {
10872     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10873   } else {
10874     RegionCodeGenTy RCG(BeginThenGen);
10875     RCG(CGF);
10876   }
10877 
10878   // If we don't require privatization of device pointers, we emit the body in
10879   // between the runtime calls. This avoids duplicating the body code.
10880   if (Info.CaptureDeviceAddrMap.empty()) {
10881     CodeGen.setAction(NoPrivAction);
10882     CodeGen(CGF);
10883   }
10884 
10885   if (IfCond) {
10886     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10887   } else {
10888     RegionCodeGenTy RCG(EndThenGen);
10889     RCG(CGF);
10890   }
10891 }
10892 
10893 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10894     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10895     const Expr *Device) {
10896   if (!CGF.HaveInsertPoint())
10897     return;
10898 
10899   assert((isa<OMPTargetEnterDataDirective>(D) ||
10900           isa<OMPTargetExitDataDirective>(D) ||
10901           isa<OMPTargetUpdateDirective>(D)) &&
10902          "Expecting either target enter, exit data, or update directives.");
10903 
10904   CodeGenFunction::OMPTargetDataInfo InputInfo;
10905   llvm::Value *MapTypesArray = nullptr;
10906   llvm::Value *MapNamesArray = nullptr;
10907   // Generate the code for the opening of the data environment.
10908   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10909                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10910     // Emit device ID if any.
10911     llvm::Value *DeviceID = nullptr;
10912     if (Device) {
10913       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10914                                            CGF.Int64Ty, /*isSigned=*/true);
10915     } else {
10916       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10917     }
10918 
10919     // Emit the number of elements in the offloading arrays.
10920     llvm::Constant *PointerNum =
10921         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10922 
10923     // Source location for the ident struct
10924     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10925 
10926     llvm::Value *OffloadingArgs[] = {RTLoc,
10927                                      DeviceID,
10928                                      PointerNum,
10929                                      InputInfo.BasePointersArray.getPointer(),
10930                                      InputInfo.PointersArray.getPointer(),
10931                                      InputInfo.SizesArray.getPointer(),
10932                                      MapTypesArray,
10933                                      MapNamesArray,
10934                                      InputInfo.MappersArray.getPointer()};
10935 
10936     // Select the right runtime function call for each standalone
10937     // directive.
10938     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10939     RuntimeFunction RTLFn;
10940     switch (D.getDirectiveKind()) {
10941     case OMPD_target_enter_data:
10942       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10943                         : OMPRTL___tgt_target_data_begin_mapper;
10944       break;
10945     case OMPD_target_exit_data:
10946       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10947                         : OMPRTL___tgt_target_data_end_mapper;
10948       break;
10949     case OMPD_target_update:
10950       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10951                         : OMPRTL___tgt_target_data_update_mapper;
10952       break;
10953     case OMPD_parallel:
10954     case OMPD_for:
10955     case OMPD_parallel_for:
10956     case OMPD_parallel_master:
10957     case OMPD_parallel_sections:
10958     case OMPD_for_simd:
10959     case OMPD_parallel_for_simd:
10960     case OMPD_cancel:
10961     case OMPD_cancellation_point:
10962     case OMPD_ordered:
10963     case OMPD_threadprivate:
10964     case OMPD_allocate:
10965     case OMPD_task:
10966     case OMPD_simd:
10967     case OMPD_sections:
10968     case OMPD_section:
10969     case OMPD_single:
10970     case OMPD_master:
10971     case OMPD_critical:
10972     case OMPD_taskyield:
10973     case OMPD_barrier:
10974     case OMPD_taskwait:
10975     case OMPD_taskgroup:
10976     case OMPD_atomic:
10977     case OMPD_flush:
10978     case OMPD_depobj:
10979     case OMPD_scan:
10980     case OMPD_teams:
10981     case OMPD_target_data:
10982     case OMPD_distribute:
10983     case OMPD_distribute_simd:
10984     case OMPD_distribute_parallel_for:
10985     case OMPD_distribute_parallel_for_simd:
10986     case OMPD_teams_distribute:
10987     case OMPD_teams_distribute_simd:
10988     case OMPD_teams_distribute_parallel_for:
10989     case OMPD_teams_distribute_parallel_for_simd:
10990     case OMPD_declare_simd:
10991     case OMPD_declare_variant:
10992     case OMPD_begin_declare_variant:
10993     case OMPD_end_declare_variant:
10994     case OMPD_declare_target:
10995     case OMPD_end_declare_target:
10996     case OMPD_declare_reduction:
10997     case OMPD_declare_mapper:
10998     case OMPD_taskloop:
10999     case OMPD_taskloop_simd:
11000     case OMPD_master_taskloop:
11001     case OMPD_master_taskloop_simd:
11002     case OMPD_parallel_master_taskloop:
11003     case OMPD_parallel_master_taskloop_simd:
11004     case OMPD_target:
11005     case OMPD_target_simd:
11006     case OMPD_target_teams_distribute:
11007     case OMPD_target_teams_distribute_simd:
11008     case OMPD_target_teams_distribute_parallel_for:
11009     case OMPD_target_teams_distribute_parallel_for_simd:
11010     case OMPD_target_teams:
11011     case OMPD_target_parallel:
11012     case OMPD_target_parallel_for:
11013     case OMPD_target_parallel_for_simd:
11014     case OMPD_requires:
11015     case OMPD_unknown:
11016     default:
11017       llvm_unreachable("Unexpected standalone target data directive.");
11018       break;
11019     }
11020     CGF.EmitRuntimeCall(
11021         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11022         OffloadingArgs);
11023   };
11024 
11025   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11026                           &MapNamesArray](CodeGenFunction &CGF,
11027                                           PrePostActionTy &) {
11028     // Fill up the arrays with all the mapped variables.
11029     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11030 
11031     // Get map clause information.
11032     MappableExprsHandler MEHandler(D, CGF);
11033     MEHandler.generateAllInfo(CombinedInfo);
11034 
11035     TargetDataInfo Info;
11036     // Fill up the arrays and create the arguments.
11037     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11038                          /*IsNonContiguous=*/true);
11039     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11040                              D.hasClausesOfKind<OMPNowaitClause>();
11041     emitOffloadingArraysArgument(
11042         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11043         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11044         {/*ForEndTask=*/false});
11045     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11046     InputInfo.BasePointersArray =
11047         Address(Info.BasePointersArray, CGM.getPointerAlign());
11048     InputInfo.PointersArray =
11049         Address(Info.PointersArray, CGM.getPointerAlign());
11050     InputInfo.SizesArray =
11051         Address(Info.SizesArray, CGM.getPointerAlign());
11052     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11053     MapTypesArray = Info.MapTypesArray;
11054     MapNamesArray = Info.MapNamesArray;
11055     if (RequiresOuterTask)
11056       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11057     else
11058       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11059   };
11060 
11061   if (IfCond) {
11062     emitIfClause(CGF, IfCond, TargetThenGen,
11063                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11064   } else {
11065     RegionCodeGenTy ThenRCG(TargetThenGen);
11066     ThenRCG(CGF);
11067   }
11068 }
11069 
11070 namespace {
11071   /// Kind of parameter in a function with 'declare simd' directive.
11072   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11073   /// Attribute set of the parameter.
11074   struct ParamAttrTy {
11075     ParamKindTy Kind = Vector;
11076     llvm::APSInt StrideOrArg;
11077     llvm::APSInt Alignment;
11078   };
11079 } // namespace
11080 
11081 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11082                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11083   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11084   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11085   // of that clause. The VLEN value must be power of 2.
11086   // In other case the notion of the function`s "characteristic data type" (CDT)
11087   // is used to compute the vector length.
11088   // CDT is defined in the following order:
11089   //   a) For non-void function, the CDT is the return type.
11090   //   b) If the function has any non-uniform, non-linear parameters, then the
11091   //   CDT is the type of the first such parameter.
11092   //   c) If the CDT determined by a) or b) above is struct, union, or class
11093   //   type which is pass-by-value (except for the type that maps to the
11094   //   built-in complex data type), the characteristic data type is int.
11095   //   d) If none of the above three cases is applicable, the CDT is int.
11096   // The VLEN is then determined based on the CDT and the size of vector
11097   // register of that ISA for which current vector version is generated. The
11098   // VLEN is computed using the formula below:
11099   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11100   // where vector register size specified in section 3.2.1 Registers and the
11101   // Stack Frame of original AMD64 ABI document.
11102   QualType RetType = FD->getReturnType();
11103   if (RetType.isNull())
11104     return 0;
11105   ASTContext &C = FD->getASTContext();
11106   QualType CDT;
11107   if (!RetType.isNull() && !RetType->isVoidType()) {
11108     CDT = RetType;
11109   } else {
11110     unsigned Offset = 0;
11111     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11112       if (ParamAttrs[Offset].Kind == Vector)
11113         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11114       ++Offset;
11115     }
11116     if (CDT.isNull()) {
11117       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11118         if (ParamAttrs[I + Offset].Kind == Vector) {
11119           CDT = FD->getParamDecl(I)->getType();
11120           break;
11121         }
11122       }
11123     }
11124   }
11125   if (CDT.isNull())
11126     CDT = C.IntTy;
11127   CDT = CDT->getCanonicalTypeUnqualified();
11128   if (CDT->isRecordType() || CDT->isUnionType())
11129     CDT = C.IntTy;
11130   return C.getTypeSize(CDT);
11131 }
11132 
11133 static void
11134 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11135                            const llvm::APSInt &VLENVal,
11136                            ArrayRef<ParamAttrTy> ParamAttrs,
11137                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11138   struct ISADataTy {
11139     char ISA;
11140     unsigned VecRegSize;
11141   };
11142   ISADataTy ISAData[] = {
11143       {
11144           'b', 128
11145       }, // SSE
11146       {
11147           'c', 256
11148       }, // AVX
11149       {
11150           'd', 256
11151       }, // AVX2
11152       {
11153           'e', 512
11154       }, // AVX512
11155   };
11156   llvm::SmallVector<char, 2> Masked;
11157   switch (State) {
11158   case OMPDeclareSimdDeclAttr::BS_Undefined:
11159     Masked.push_back('N');
11160     Masked.push_back('M');
11161     break;
11162   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11163     Masked.push_back('N');
11164     break;
11165   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11166     Masked.push_back('M');
11167     break;
11168   }
11169   for (char Mask : Masked) {
11170     for (const ISADataTy &Data : ISAData) {
11171       SmallString<256> Buffer;
11172       llvm::raw_svector_ostream Out(Buffer);
11173       Out << "_ZGV" << Data.ISA << Mask;
11174       if (!VLENVal) {
11175         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11176         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11177         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11178       } else {
11179         Out << VLENVal;
11180       }
11181       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11182         switch (ParamAttr.Kind){
11183         case LinearWithVarStride:
11184           Out << 's' << ParamAttr.StrideOrArg;
11185           break;
11186         case Linear:
11187           Out << 'l';
11188           if (ParamAttr.StrideOrArg != 1)
11189             Out << ParamAttr.StrideOrArg;
11190           break;
11191         case Uniform:
11192           Out << 'u';
11193           break;
11194         case Vector:
11195           Out << 'v';
11196           break;
11197         }
11198         if (!!ParamAttr.Alignment)
11199           Out << 'a' << ParamAttr.Alignment;
11200       }
11201       Out << '_' << Fn->getName();
11202       Fn->addFnAttr(Out.str());
11203     }
11204   }
11205 }
11206 
11207 // This are the Functions that are needed to mangle the name of the
11208 // vector functions generated by the compiler, according to the rules
11209 // defined in the "Vector Function ABI specifications for AArch64",
11210 // available at
11211 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11212 
11213 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11214 ///
11215 /// TODO: Need to implement the behavior for reference marked with a
11216 /// var or no linear modifiers (1.b in the section). For this, we
11217 /// need to extend ParamKindTy to support the linear modifiers.
11218 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11219   QT = QT.getCanonicalType();
11220 
11221   if (QT->isVoidType())
11222     return false;
11223 
11224   if (Kind == ParamKindTy::Uniform)
11225     return false;
11226 
11227   if (Kind == ParamKindTy::Linear)
11228     return false;
11229 
11230   // TODO: Handle linear references with modifiers
11231 
11232   if (Kind == ParamKindTy::LinearWithVarStride)
11233     return false;
11234 
11235   return true;
11236 }
11237 
11238 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11239 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11240   QT = QT.getCanonicalType();
11241   unsigned Size = C.getTypeSize(QT);
11242 
11243   // Only scalars and complex within 16 bytes wide set PVB to true.
11244   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11245     return false;
11246 
11247   if (QT->isFloatingType())
11248     return true;
11249 
11250   if (QT->isIntegerType())
11251     return true;
11252 
11253   if (QT->isPointerType())
11254     return true;
11255 
11256   // TODO: Add support for complex types (section 3.1.2, item 2).
11257 
11258   return false;
11259 }
11260 
11261 /// Computes the lane size (LS) of a return type or of an input parameter,
11262 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11263 /// TODO: Add support for references, section 3.2.1, item 1.
11264 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11265   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11266     QualType PTy = QT.getCanonicalType()->getPointeeType();
11267     if (getAArch64PBV(PTy, C))
11268       return C.getTypeSize(PTy);
11269   }
11270   if (getAArch64PBV(QT, C))
11271     return C.getTypeSize(QT);
11272 
11273   return C.getTypeSize(C.getUIntPtrType());
11274 }
11275 
11276 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11277 // signature of the scalar function, as defined in 3.2.2 of the
11278 // AAVFABI.
11279 static std::tuple<unsigned, unsigned, bool>
11280 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11281   QualType RetType = FD->getReturnType().getCanonicalType();
11282 
11283   ASTContext &C = FD->getASTContext();
11284 
11285   bool OutputBecomesInput = false;
11286 
11287   llvm::SmallVector<unsigned, 8> Sizes;
11288   if (!RetType->isVoidType()) {
11289     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11290     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11291       OutputBecomesInput = true;
11292   }
11293   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11294     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11295     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11296   }
11297 
11298   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11299   // The LS of a function parameter / return value can only be a power
11300   // of 2, starting from 8 bits, up to 128.
11301   assert(std::all_of(Sizes.begin(), Sizes.end(),
11302                      [](unsigned Size) {
11303                        return Size == 8 || Size == 16 || Size == 32 ||
11304                               Size == 64 || Size == 128;
11305                      }) &&
11306          "Invalid size");
11307 
11308   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11309                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11310                          OutputBecomesInput);
11311 }
11312 
11313 /// Mangle the parameter part of the vector function name according to
11314 /// their OpenMP classification. The mangling function is defined in
11315 /// section 3.5 of the AAVFABI.
11316 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11317   SmallString<256> Buffer;
11318   llvm::raw_svector_ostream Out(Buffer);
11319   for (const auto &ParamAttr : ParamAttrs) {
11320     switch (ParamAttr.Kind) {
11321     case LinearWithVarStride:
11322       Out << "ls" << ParamAttr.StrideOrArg;
11323       break;
11324     case Linear:
11325       Out << 'l';
11326       // Don't print the step value if it is not present or if it is
11327       // equal to 1.
11328       if (ParamAttr.StrideOrArg != 1)
11329         Out << ParamAttr.StrideOrArg;
11330       break;
11331     case Uniform:
11332       Out << 'u';
11333       break;
11334     case Vector:
11335       Out << 'v';
11336       break;
11337     }
11338 
11339     if (!!ParamAttr.Alignment)
11340       Out << 'a' << ParamAttr.Alignment;
11341   }
11342 
11343   return std::string(Out.str());
11344 }
11345 
11346 // Function used to add the attribute. The parameter `VLEN` is
11347 // templated to allow the use of "x" when targeting scalable functions
11348 // for SVE.
11349 template <typename T>
11350 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11351                                  char ISA, StringRef ParSeq,
11352                                  StringRef MangledName, bool OutputBecomesInput,
11353                                  llvm::Function *Fn) {
11354   SmallString<256> Buffer;
11355   llvm::raw_svector_ostream Out(Buffer);
11356   Out << Prefix << ISA << LMask << VLEN;
11357   if (OutputBecomesInput)
11358     Out << "v";
11359   Out << ParSeq << "_" << MangledName;
11360   Fn->addFnAttr(Out.str());
11361 }
11362 
11363 // Helper function to generate the Advanced SIMD names depending on
11364 // the value of the NDS when simdlen is not present.
11365 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11366                                       StringRef Prefix, char ISA,
11367                                       StringRef ParSeq, StringRef MangledName,
11368                                       bool OutputBecomesInput,
11369                                       llvm::Function *Fn) {
11370   switch (NDS) {
11371   case 8:
11372     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11373                          OutputBecomesInput, Fn);
11374     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11375                          OutputBecomesInput, Fn);
11376     break;
11377   case 16:
11378     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11379                          OutputBecomesInput, Fn);
11380     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11381                          OutputBecomesInput, Fn);
11382     break;
11383   case 32:
11384     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11385                          OutputBecomesInput, Fn);
11386     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11387                          OutputBecomesInput, Fn);
11388     break;
11389   case 64:
11390   case 128:
11391     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11392                          OutputBecomesInput, Fn);
11393     break;
11394   default:
11395     llvm_unreachable("Scalar type is too wide.");
11396   }
11397 }
11398 
11399 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11400 static void emitAArch64DeclareSimdFunction(
11401     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11402     ArrayRef<ParamAttrTy> ParamAttrs,
11403     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11404     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11405 
11406   // Get basic data for building the vector signature.
11407   const auto Data = getNDSWDS(FD, ParamAttrs);
11408   const unsigned NDS = std::get<0>(Data);
11409   const unsigned WDS = std::get<1>(Data);
11410   const bool OutputBecomesInput = std::get<2>(Data);
11411 
11412   // Check the values provided via `simdlen` by the user.
11413   // 1. A `simdlen(1)` doesn't produce vector signatures,
11414   if (UserVLEN == 1) {
11415     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11416         DiagnosticsEngine::Warning,
11417         "The clause simdlen(1) has no effect when targeting aarch64.");
11418     CGM.getDiags().Report(SLoc, DiagID);
11419     return;
11420   }
11421 
11422   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11423   // Advanced SIMD output.
11424   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11425     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11426         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11427                                     "power of 2 when targeting Advanced SIMD.");
11428     CGM.getDiags().Report(SLoc, DiagID);
11429     return;
11430   }
11431 
11432   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11433   // limits.
11434   if (ISA == 's' && UserVLEN != 0) {
11435     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11436       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11437           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11438                                       "lanes in the architectural constraints "
11439                                       "for SVE (min is 128-bit, max is "
11440                                       "2048-bit, by steps of 128-bit)");
11441       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11442       return;
11443     }
11444   }
11445 
11446   // Sort out parameter sequence.
11447   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11448   StringRef Prefix = "_ZGV";
11449   // Generate simdlen from user input (if any).
11450   if (UserVLEN) {
11451     if (ISA == 's') {
11452       // SVE generates only a masked function.
11453       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11454                            OutputBecomesInput, Fn);
11455     } else {
11456       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11457       // Advanced SIMD generates one or two functions, depending on
11458       // the `[not]inbranch` clause.
11459       switch (State) {
11460       case OMPDeclareSimdDeclAttr::BS_Undefined:
11461         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11462                              OutputBecomesInput, Fn);
11463         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11464                              OutputBecomesInput, Fn);
11465         break;
11466       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11467         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11468                              OutputBecomesInput, Fn);
11469         break;
11470       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11471         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11472                              OutputBecomesInput, Fn);
11473         break;
11474       }
11475     }
11476   } else {
11477     // If no user simdlen is provided, follow the AAVFABI rules for
11478     // generating the vector length.
11479     if (ISA == 's') {
11480       // SVE, section 3.4.1, item 1.
11481       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11482                            OutputBecomesInput, Fn);
11483     } else {
11484       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11485       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11486       // two vector names depending on the use of the clause
11487       // `[not]inbranch`.
11488       switch (State) {
11489       case OMPDeclareSimdDeclAttr::BS_Undefined:
11490         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11491                                   OutputBecomesInput, Fn);
11492         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11493                                   OutputBecomesInput, Fn);
11494         break;
11495       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11496         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11497                                   OutputBecomesInput, Fn);
11498         break;
11499       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11500         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11501                                   OutputBecomesInput, Fn);
11502         break;
11503       }
11504     }
11505   }
11506 }
11507 
11508 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11509                                               llvm::Function *Fn) {
11510   ASTContext &C = CGM.getContext();
11511   FD = FD->getMostRecentDecl();
11512   // Map params to their positions in function decl.
11513   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11514   if (isa<CXXMethodDecl>(FD))
11515     ParamPositions.try_emplace(FD, 0);
11516   unsigned ParamPos = ParamPositions.size();
11517   for (const ParmVarDecl *P : FD->parameters()) {
11518     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11519     ++ParamPos;
11520   }
11521   while (FD) {
11522     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11523       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11524       // Mark uniform parameters.
11525       for (const Expr *E : Attr->uniforms()) {
11526         E = E->IgnoreParenImpCasts();
11527         unsigned Pos;
11528         if (isa<CXXThisExpr>(E)) {
11529           Pos = ParamPositions[FD];
11530         } else {
11531           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11532                                 ->getCanonicalDecl();
11533           Pos = ParamPositions[PVD];
11534         }
11535         ParamAttrs[Pos].Kind = Uniform;
11536       }
11537       // Get alignment info.
11538       auto NI = Attr->alignments_begin();
11539       for (const Expr *E : Attr->aligneds()) {
11540         E = E->IgnoreParenImpCasts();
11541         unsigned Pos;
11542         QualType ParmTy;
11543         if (isa<CXXThisExpr>(E)) {
11544           Pos = ParamPositions[FD];
11545           ParmTy = E->getType();
11546         } else {
11547           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11548                                 ->getCanonicalDecl();
11549           Pos = ParamPositions[PVD];
11550           ParmTy = PVD->getType();
11551         }
11552         ParamAttrs[Pos].Alignment =
11553             (*NI)
11554                 ? (*NI)->EvaluateKnownConstInt(C)
11555                 : llvm::APSInt::getUnsigned(
11556                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11557                           .getQuantity());
11558         ++NI;
11559       }
11560       // Mark linear parameters.
11561       auto SI = Attr->steps_begin();
11562       auto MI = Attr->modifiers_begin();
11563       for (const Expr *E : Attr->linears()) {
11564         E = E->IgnoreParenImpCasts();
11565         unsigned Pos;
11566         // Rescaling factor needed to compute the linear parameter
11567         // value in the mangled name.
11568         unsigned PtrRescalingFactor = 1;
11569         if (isa<CXXThisExpr>(E)) {
11570           Pos = ParamPositions[FD];
11571         } else {
11572           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11573                                 ->getCanonicalDecl();
11574           Pos = ParamPositions[PVD];
11575           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11576             PtrRescalingFactor = CGM.getContext()
11577                                      .getTypeSizeInChars(P->getPointeeType())
11578                                      .getQuantity();
11579         }
11580         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11581         ParamAttr.Kind = Linear;
11582         // Assuming a stride of 1, for `linear` without modifiers.
11583         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11584         if (*SI) {
11585           Expr::EvalResult Result;
11586           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11587             if (const auto *DRE =
11588                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11589               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11590                 ParamAttr.Kind = LinearWithVarStride;
11591                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11592                     ParamPositions[StridePVD->getCanonicalDecl()]);
11593               }
11594             }
11595           } else {
11596             ParamAttr.StrideOrArg = Result.Val.getInt();
11597           }
11598         }
11599         // If we are using a linear clause on a pointer, we need to
11600         // rescale the value of linear_step with the byte size of the
11601         // pointee type.
11602         if (Linear == ParamAttr.Kind)
11603           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11604         ++SI;
11605         ++MI;
11606       }
11607       llvm::APSInt VLENVal;
11608       SourceLocation ExprLoc;
11609       const Expr *VLENExpr = Attr->getSimdlen();
11610       if (VLENExpr) {
11611         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11612         ExprLoc = VLENExpr->getExprLoc();
11613       }
11614       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11615       if (CGM.getTriple().isX86()) {
11616         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11617       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11618         unsigned VLEN = VLENVal.getExtValue();
11619         StringRef MangledName = Fn->getName();
11620         if (CGM.getTarget().hasFeature("sve"))
11621           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11622                                          MangledName, 's', 128, Fn, ExprLoc);
11623         if (CGM.getTarget().hasFeature("neon"))
11624           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11625                                          MangledName, 'n', 128, Fn, ExprLoc);
11626       }
11627     }
11628     FD = FD->getPreviousDecl();
11629   }
11630 }
11631 
11632 namespace {
11633 /// Cleanup action for doacross support.
11634 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11635 public:
11636   static const int DoacrossFinArgs = 2;
11637 
11638 private:
11639   llvm::FunctionCallee RTLFn;
11640   llvm::Value *Args[DoacrossFinArgs];
11641 
11642 public:
11643   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11644                     ArrayRef<llvm::Value *> CallArgs)
11645       : RTLFn(RTLFn) {
11646     assert(CallArgs.size() == DoacrossFinArgs);
11647     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11648   }
11649   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11650     if (!CGF.HaveInsertPoint())
11651       return;
11652     CGF.EmitRuntimeCall(RTLFn, Args);
11653   }
11654 };
11655 } // namespace
11656 
11657 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11658                                        const OMPLoopDirective &D,
11659                                        ArrayRef<Expr *> NumIterations) {
11660   if (!CGF.HaveInsertPoint())
11661     return;
11662 
11663   ASTContext &C = CGM.getContext();
11664   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11665   RecordDecl *RD;
11666   if (KmpDimTy.isNull()) {
11667     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11668     //  kmp_int64 lo; // lower
11669     //  kmp_int64 up; // upper
11670     //  kmp_int64 st; // stride
11671     // };
11672     RD = C.buildImplicitRecord("kmp_dim");
11673     RD->startDefinition();
11674     addFieldToRecordDecl(C, RD, Int64Ty);
11675     addFieldToRecordDecl(C, RD, Int64Ty);
11676     addFieldToRecordDecl(C, RD, Int64Ty);
11677     RD->completeDefinition();
11678     KmpDimTy = C.getRecordType(RD);
11679   } else {
11680     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11681   }
11682   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11683   QualType ArrayTy =
11684       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11685 
11686   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11687   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11688   enum { LowerFD = 0, UpperFD, StrideFD };
11689   // Fill dims with data.
11690   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11691     LValue DimsLVal = CGF.MakeAddrLValue(
11692         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11693     // dims.upper = num_iterations;
11694     LValue UpperLVal = CGF.EmitLValueForField(
11695         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11696     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11697         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11698         Int64Ty, NumIterations[I]->getExprLoc());
11699     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11700     // dims.stride = 1;
11701     LValue StrideLVal = CGF.EmitLValueForField(
11702         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11703     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11704                           StrideLVal);
11705   }
11706 
11707   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11708   // kmp_int32 num_dims, struct kmp_dim * dims);
11709   llvm::Value *Args[] = {
11710       emitUpdateLocation(CGF, D.getBeginLoc()),
11711       getThreadID(CGF, D.getBeginLoc()),
11712       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11713       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11714           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11715           CGM.VoidPtrTy)};
11716 
11717   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11718       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11719   CGF.EmitRuntimeCall(RTLFn, Args);
11720   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11721       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11722   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11723       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11724   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11725                                              llvm::makeArrayRef(FiniArgs));
11726 }
11727 
11728 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11729                                           const OMPDependClause *C) {
11730   QualType Int64Ty =
11731       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11732   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11733   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11734       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11735   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11736   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11737     const Expr *CounterVal = C->getLoopData(I);
11738     assert(CounterVal);
11739     llvm::Value *CntVal = CGF.EmitScalarConversion(
11740         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11741         CounterVal->getExprLoc());
11742     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11743                           /*Volatile=*/false, Int64Ty);
11744   }
11745   llvm::Value *Args[] = {
11746       emitUpdateLocation(CGF, C->getBeginLoc()),
11747       getThreadID(CGF, C->getBeginLoc()),
11748       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11749   llvm::FunctionCallee RTLFn;
11750   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11751     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11752                                                   OMPRTL___kmpc_doacross_post);
11753   } else {
11754     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11755     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11756                                                   OMPRTL___kmpc_doacross_wait);
11757   }
11758   CGF.EmitRuntimeCall(RTLFn, Args);
11759 }
11760 
11761 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11762                                llvm::FunctionCallee Callee,
11763                                ArrayRef<llvm::Value *> Args) const {
11764   assert(Loc.isValid() && "Outlined function call location must be valid.");
11765   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11766 
11767   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11768     if (Fn->doesNotThrow()) {
11769       CGF.EmitNounwindRuntimeCall(Fn, Args);
11770       return;
11771     }
11772   }
11773   CGF.EmitRuntimeCall(Callee, Args);
11774 }
11775 
11776 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11777     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11778     ArrayRef<llvm::Value *> Args) const {
11779   emitCall(CGF, Loc, OutlinedFn, Args);
11780 }
11781 
11782 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11783   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11784     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11785       HasEmittedDeclareTargetRegion = true;
11786 }
11787 
11788 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11789                                              const VarDecl *NativeParam,
11790                                              const VarDecl *TargetParam) const {
11791   return CGF.GetAddrOfLocalVar(NativeParam);
11792 }
11793 
11794 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11795                                                    const VarDecl *VD) {
11796   if (!VD)
11797     return Address::invalid();
11798   Address UntiedAddr = Address::invalid();
11799   Address UntiedRealAddr = Address::invalid();
11800   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11801   if (It != FunctionToUntiedTaskStackMap.end()) {
11802     const UntiedLocalVarsAddressesMap &UntiedData =
11803         UntiedLocalVarsStack[It->second];
11804     auto I = UntiedData.find(VD);
11805     if (I != UntiedData.end()) {
11806       UntiedAddr = I->second.first;
11807       UntiedRealAddr = I->second.second;
11808     }
11809   }
11810   const VarDecl *CVD = VD->getCanonicalDecl();
11811   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11812     // Use the default allocation.
11813     if (!isAllocatableDecl(VD))
11814       return UntiedAddr;
11815     llvm::Value *Size;
11816     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11817     if (CVD->getType()->isVariablyModifiedType()) {
11818       Size = CGF.getTypeSize(CVD->getType());
11819       // Align the size: ((size + align - 1) / align) * align
11820       Size = CGF.Builder.CreateNUWAdd(
11821           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11822       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11823       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11824     } else {
11825       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11826       Size = CGM.getSize(Sz.alignTo(Align));
11827     }
11828     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11829     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11830     assert(AA->getAllocator() &&
11831            "Expected allocator expression for non-default allocator.");
11832     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11833     // According to the standard, the original allocator type is a enum
11834     // (integer). Convert to pointer type, if required.
11835     Allocator = CGF.EmitScalarConversion(
11836         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
11837         AA->getAllocator()->getExprLoc());
11838     llvm::Value *Args[] = {ThreadID, Size, Allocator};
11839 
11840     llvm::Value *Addr =
11841         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11842                                 CGM.getModule(), OMPRTL___kmpc_alloc),
11843                             Args, getName({CVD->getName(), ".void.addr"}));
11844     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11845         CGM.getModule(), OMPRTL___kmpc_free);
11846     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11847     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11848         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11849     if (UntiedAddr.isValid())
11850       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11851 
11852     // Cleanup action for allocate support.
11853     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11854       llvm::FunctionCallee RTLFn;
11855       unsigned LocEncoding;
11856       Address Addr;
11857       const Expr *Allocator;
11858 
11859     public:
11860       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
11861                            Address Addr, const Expr *Allocator)
11862           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11863             Allocator(Allocator) {}
11864       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11865         if (!CGF.HaveInsertPoint())
11866           return;
11867         llvm::Value *Args[3];
11868         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11869             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11870         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11871             Addr.getPointer(), CGF.VoidPtrTy);
11872         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
11873         // According to the standard, the original allocator type is a enum
11874         // (integer). Convert to pointer type, if required.
11875         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11876                                             CGF.getContext().VoidPtrTy,
11877                                             Allocator->getExprLoc());
11878         Args[2] = AllocVal;
11879 
11880         CGF.EmitRuntimeCall(RTLFn, Args);
11881       }
11882     };
11883     Address VDAddr =
11884         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
11885     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11886         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11887         VDAddr, AA->getAllocator());
11888     if (UntiedRealAddr.isValid())
11889       if (auto *Region =
11890               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11891         Region->emitUntiedSwitch(CGF);
11892     return VDAddr;
11893   }
11894   return UntiedAddr;
11895 }
11896 
11897 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11898                                              const VarDecl *VD) const {
11899   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11900   if (It == FunctionToUntiedTaskStackMap.end())
11901     return false;
11902   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11903 }
11904 
11905 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11906     CodeGenModule &CGM, const OMPLoopDirective &S)
11907     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11908   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11909   if (!NeedToPush)
11910     return;
11911   NontemporalDeclsSet &DS =
11912       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11913   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11914     for (const Stmt *Ref : C->private_refs()) {
11915       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11916       const ValueDecl *VD;
11917       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11918         VD = DRE->getDecl();
11919       } else {
11920         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11921         assert((ME->isImplicitCXXThis() ||
11922                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11923                "Expected member of current class.");
11924         VD = ME->getMemberDecl();
11925       }
11926       DS.insert(VD);
11927     }
11928   }
11929 }
11930 
11931 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11932   if (!NeedToPush)
11933     return;
11934   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11935 }
11936 
11937 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11938     CodeGenFunction &CGF,
11939     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
11940                          std::pair<Address, Address>> &LocalVars)
11941     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11942   if (!NeedToPush)
11943     return;
11944   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11945       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11946   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11947 }
11948 
11949 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11950   if (!NeedToPush)
11951     return;
11952   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11953 }
11954 
11955 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11956   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11957 
11958   return llvm::any_of(
11959       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11960       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11961 }
11962 
11963 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11964     const OMPExecutableDirective &S,
11965     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11966     const {
11967   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11968   // Vars in target/task regions must be excluded completely.
11969   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11970       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11971     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11972     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11973     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11974     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11975       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11976         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11977     }
11978   }
11979   // Exclude vars in private clauses.
11980   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11981     for (const Expr *Ref : C->varlists()) {
11982       if (!Ref->getType()->isScalarType())
11983         continue;
11984       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11985       if (!DRE)
11986         continue;
11987       NeedToCheckForLPCs.insert(DRE->getDecl());
11988     }
11989   }
11990   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11991     for (const Expr *Ref : C->varlists()) {
11992       if (!Ref->getType()->isScalarType())
11993         continue;
11994       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11995       if (!DRE)
11996         continue;
11997       NeedToCheckForLPCs.insert(DRE->getDecl());
11998     }
11999   }
12000   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12001     for (const Expr *Ref : C->varlists()) {
12002       if (!Ref->getType()->isScalarType())
12003         continue;
12004       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12005       if (!DRE)
12006         continue;
12007       NeedToCheckForLPCs.insert(DRE->getDecl());
12008     }
12009   }
12010   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12011     for (const Expr *Ref : C->varlists()) {
12012       if (!Ref->getType()->isScalarType())
12013         continue;
12014       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12015       if (!DRE)
12016         continue;
12017       NeedToCheckForLPCs.insert(DRE->getDecl());
12018     }
12019   }
12020   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12021     for (const Expr *Ref : C->varlists()) {
12022       if (!Ref->getType()->isScalarType())
12023         continue;
12024       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12025       if (!DRE)
12026         continue;
12027       NeedToCheckForLPCs.insert(DRE->getDecl());
12028     }
12029   }
12030   for (const Decl *VD : NeedToCheckForLPCs) {
12031     for (const LastprivateConditionalData &Data :
12032          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12033       if (Data.DeclToUniqueName.count(VD) > 0) {
12034         if (!Data.Disabled)
12035           NeedToAddForLPCsAsDisabled.insert(VD);
12036         break;
12037       }
12038     }
12039   }
12040 }
12041 
12042 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12043     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12044     : CGM(CGF.CGM),
12045       Action((CGM.getLangOpts().OpenMP >= 50 &&
12046               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12047                            [](const OMPLastprivateClause *C) {
12048                              return C->getKind() ==
12049                                     OMPC_LASTPRIVATE_conditional;
12050                            }))
12051                  ? ActionToDo::PushAsLastprivateConditional
12052                  : ActionToDo::DoNotPush) {
12053   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12054   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12055     return;
12056   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12057          "Expected a push action.");
12058   LastprivateConditionalData &Data =
12059       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12060   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12061     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12062       continue;
12063 
12064     for (const Expr *Ref : C->varlists()) {
12065       Data.DeclToUniqueName.insert(std::make_pair(
12066           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12067           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12068     }
12069   }
12070   Data.IVLVal = IVLVal;
12071   Data.Fn = CGF.CurFn;
12072 }
12073 
12074 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12075     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12076     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12077   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12078   if (CGM.getLangOpts().OpenMP < 50)
12079     return;
12080   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12081   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12082   if (!NeedToAddForLPCsAsDisabled.empty()) {
12083     Action = ActionToDo::DisableLastprivateConditional;
12084     LastprivateConditionalData &Data =
12085         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12086     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12087       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12088     Data.Fn = CGF.CurFn;
12089     Data.Disabled = true;
12090   }
12091 }
12092 
12093 CGOpenMPRuntime::LastprivateConditionalRAII
12094 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12095     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12096   return LastprivateConditionalRAII(CGF, S);
12097 }
12098 
12099 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12100   if (CGM.getLangOpts().OpenMP < 50)
12101     return;
12102   if (Action == ActionToDo::DisableLastprivateConditional) {
12103     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12104            "Expected list of disabled private vars.");
12105     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12106   }
12107   if (Action == ActionToDo::PushAsLastprivateConditional) {
12108     assert(
12109         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12110         "Expected list of lastprivate conditional vars.");
12111     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12112   }
12113 }
12114 
12115 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12116                                                         const VarDecl *VD) {
12117   ASTContext &C = CGM.getContext();
12118   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12119   if (I == LastprivateConditionalToTypes.end())
12120     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12121   QualType NewType;
12122   const FieldDecl *VDField;
12123   const FieldDecl *FiredField;
12124   LValue BaseLVal;
12125   auto VI = I->getSecond().find(VD);
12126   if (VI == I->getSecond().end()) {
12127     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12128     RD->startDefinition();
12129     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12130     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12131     RD->completeDefinition();
12132     NewType = C.getRecordType(RD);
12133     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12134     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12135     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12136   } else {
12137     NewType = std::get<0>(VI->getSecond());
12138     VDField = std::get<1>(VI->getSecond());
12139     FiredField = std::get<2>(VI->getSecond());
12140     BaseLVal = std::get<3>(VI->getSecond());
12141   }
12142   LValue FiredLVal =
12143       CGF.EmitLValueForField(BaseLVal, FiredField);
12144   CGF.EmitStoreOfScalar(
12145       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12146       FiredLVal);
12147   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12148 }
12149 
12150 namespace {
12151 /// Checks if the lastprivate conditional variable is referenced in LHS.
12152 class LastprivateConditionalRefChecker final
12153     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12154   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12155   const Expr *FoundE = nullptr;
12156   const Decl *FoundD = nullptr;
12157   StringRef UniqueDeclName;
12158   LValue IVLVal;
12159   llvm::Function *FoundFn = nullptr;
12160   SourceLocation Loc;
12161 
12162 public:
12163   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12164     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12165          llvm::reverse(LPM)) {
12166       auto It = D.DeclToUniqueName.find(E->getDecl());
12167       if (It == D.DeclToUniqueName.end())
12168         continue;
12169       if (D.Disabled)
12170         return false;
12171       FoundE = E;
12172       FoundD = E->getDecl()->getCanonicalDecl();
12173       UniqueDeclName = It->second;
12174       IVLVal = D.IVLVal;
12175       FoundFn = D.Fn;
12176       break;
12177     }
12178     return FoundE == E;
12179   }
12180   bool VisitMemberExpr(const MemberExpr *E) {
12181     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12182       return false;
12183     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12184          llvm::reverse(LPM)) {
12185       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12186       if (It == D.DeclToUniqueName.end())
12187         continue;
12188       if (D.Disabled)
12189         return false;
12190       FoundE = E;
12191       FoundD = E->getMemberDecl()->getCanonicalDecl();
12192       UniqueDeclName = It->second;
12193       IVLVal = D.IVLVal;
12194       FoundFn = D.Fn;
12195       break;
12196     }
12197     return FoundE == E;
12198   }
12199   bool VisitStmt(const Stmt *S) {
12200     for (const Stmt *Child : S->children()) {
12201       if (!Child)
12202         continue;
12203       if (const auto *E = dyn_cast<Expr>(Child))
12204         if (!E->isGLValue())
12205           continue;
12206       if (Visit(Child))
12207         return true;
12208     }
12209     return false;
12210   }
12211   explicit LastprivateConditionalRefChecker(
12212       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12213       : LPM(LPM) {}
12214   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12215   getFoundData() const {
12216     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12217   }
12218 };
12219 } // namespace
12220 
12221 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12222                                                        LValue IVLVal,
12223                                                        StringRef UniqueDeclName,
12224                                                        LValue LVal,
12225                                                        SourceLocation Loc) {
12226   // Last updated loop counter for the lastprivate conditional var.
12227   // int<xx> last_iv = 0;
12228   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12229   llvm::Constant *LastIV =
12230       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12231   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12232       IVLVal.getAlignment().getAsAlign());
12233   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12234 
12235   // Last value of the lastprivate conditional.
12236   // decltype(priv_a) last_a;
12237   llvm::Constant *Last = getOrCreateInternalVariable(
12238       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12239   cast<llvm::GlobalVariable>(Last)->setAlignment(
12240       LVal.getAlignment().getAsAlign());
12241   LValue LastLVal =
12242       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12243 
12244   // Global loop counter. Required to handle inner parallel-for regions.
12245   // iv
12246   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12247 
12248   // #pragma omp critical(a)
12249   // if (last_iv <= iv) {
12250   //   last_iv = iv;
12251   //   last_a = priv_a;
12252   // }
12253   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12254                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12255     Action.Enter(CGF);
12256     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12257     // (last_iv <= iv) ? Check if the variable is updated and store new
12258     // value in global var.
12259     llvm::Value *CmpRes;
12260     if (IVLVal.getType()->isSignedIntegerType()) {
12261       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12262     } else {
12263       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12264              "Loop iteration variable must be integer.");
12265       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12266     }
12267     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12268     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12269     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12270     // {
12271     CGF.EmitBlock(ThenBB);
12272 
12273     //   last_iv = iv;
12274     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12275 
12276     //   last_a = priv_a;
12277     switch (CGF.getEvaluationKind(LVal.getType())) {
12278     case TEK_Scalar: {
12279       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12280       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12281       break;
12282     }
12283     case TEK_Complex: {
12284       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12285       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12286       break;
12287     }
12288     case TEK_Aggregate:
12289       llvm_unreachable(
12290           "Aggregates are not supported in lastprivate conditional.");
12291     }
12292     // }
12293     CGF.EmitBranch(ExitBB);
12294     // There is no need to emit line number for unconditional branch.
12295     (void)ApplyDebugLocation::CreateEmpty(CGF);
12296     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12297   };
12298 
12299   if (CGM.getLangOpts().OpenMPSimd) {
12300     // Do not emit as a critical region as no parallel region could be emitted.
12301     RegionCodeGenTy ThenRCG(CodeGen);
12302     ThenRCG(CGF);
12303   } else {
12304     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12305   }
12306 }
12307 
12308 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12309                                                          const Expr *LHS) {
12310   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12311     return;
12312   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12313   if (!Checker.Visit(LHS))
12314     return;
12315   const Expr *FoundE;
12316   const Decl *FoundD;
12317   StringRef UniqueDeclName;
12318   LValue IVLVal;
12319   llvm::Function *FoundFn;
12320   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12321       Checker.getFoundData();
12322   if (FoundFn != CGF.CurFn) {
12323     // Special codegen for inner parallel regions.
12324     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12325     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12326     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12327            "Lastprivate conditional is not found in outer region.");
12328     QualType StructTy = std::get<0>(It->getSecond());
12329     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12330     LValue PrivLVal = CGF.EmitLValue(FoundE);
12331     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12332         PrivLVal.getAddress(CGF),
12333         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12334     LValue BaseLVal =
12335         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12336     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12337     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12338                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12339                         FiredLVal, llvm::AtomicOrdering::Unordered,
12340                         /*IsVolatile=*/true, /*isInit=*/false);
12341     return;
12342   }
12343 
12344   // Private address of the lastprivate conditional in the current context.
12345   // priv_a
12346   LValue LVal = CGF.EmitLValue(FoundE);
12347   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12348                                    FoundE->getExprLoc());
12349 }
12350 
12351 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12352     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12353     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12354   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12355     return;
12356   auto Range = llvm::reverse(LastprivateConditionalStack);
12357   auto It = llvm::find_if(
12358       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12359   if (It == Range.end() || It->Fn != CGF.CurFn)
12360     return;
12361   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12362   assert(LPCI != LastprivateConditionalToTypes.end() &&
12363          "Lastprivates must be registered already.");
12364   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12365   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12366   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12367   for (const auto &Pair : It->DeclToUniqueName) {
12368     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12369     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12370       continue;
12371     auto I = LPCI->getSecond().find(Pair.first);
12372     assert(I != LPCI->getSecond().end() &&
12373            "Lastprivate must be rehistered already.");
12374     // bool Cmp = priv_a.Fired != 0;
12375     LValue BaseLVal = std::get<3>(I->getSecond());
12376     LValue FiredLVal =
12377         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12378     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12379     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12380     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12381     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12382     // if (Cmp) {
12383     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12384     CGF.EmitBlock(ThenBB);
12385     Address Addr = CGF.GetAddrOfLocalVar(VD);
12386     LValue LVal;
12387     if (VD->getType()->isReferenceType())
12388       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12389                                            AlignmentSource::Decl);
12390     else
12391       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12392                                 AlignmentSource::Decl);
12393     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12394                                      D.getBeginLoc());
12395     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12396     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12397     // }
12398   }
12399 }
12400 
12401 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12402     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12403     SourceLocation Loc) {
12404   if (CGF.getLangOpts().OpenMP < 50)
12405     return;
12406   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12407   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12408          "Unknown lastprivate conditional variable.");
12409   StringRef UniqueName = It->second;
12410   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12411   // The variable was not updated in the region - exit.
12412   if (!GV)
12413     return;
12414   LValue LPLVal = CGF.MakeAddrLValue(
12415       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12416   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12417   CGF.EmitStoreOfScalar(Res, PrivLVal);
12418 }
12419 
12420 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12421     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12422     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12423   llvm_unreachable("Not supported in SIMD-only mode");
12424 }
12425 
12426 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12427     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12428     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12429   llvm_unreachable("Not supported in SIMD-only mode");
12430 }
12431 
12432 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12433     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12434     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12435     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12436     bool Tied, unsigned &NumberOfParts) {
12437   llvm_unreachable("Not supported in SIMD-only mode");
12438 }
12439 
12440 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12441                                            SourceLocation Loc,
12442                                            llvm::Function *OutlinedFn,
12443                                            ArrayRef<llvm::Value *> CapturedVars,
12444                                            const Expr *IfCond) {
12445   llvm_unreachable("Not supported in SIMD-only mode");
12446 }
12447 
12448 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12449     CodeGenFunction &CGF, StringRef CriticalName,
12450     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12451     const Expr *Hint) {
12452   llvm_unreachable("Not supported in SIMD-only mode");
12453 }
12454 
12455 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12456                                            const RegionCodeGenTy &MasterOpGen,
12457                                            SourceLocation Loc) {
12458   llvm_unreachable("Not supported in SIMD-only mode");
12459 }
12460 
12461 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12462                                             SourceLocation Loc) {
12463   llvm_unreachable("Not supported in SIMD-only mode");
12464 }
12465 
12466 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12467     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12468     SourceLocation Loc) {
12469   llvm_unreachable("Not supported in SIMD-only mode");
12470 }
12471 
12472 void CGOpenMPSIMDRuntime::emitSingleRegion(
12473     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12474     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12475     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12476     ArrayRef<const Expr *> AssignmentOps) {
12477   llvm_unreachable("Not supported in SIMD-only mode");
12478 }
12479 
12480 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12481                                             const RegionCodeGenTy &OrderedOpGen,
12482                                             SourceLocation Loc,
12483                                             bool IsThreads) {
12484   llvm_unreachable("Not supported in SIMD-only mode");
12485 }
12486 
12487 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12488                                           SourceLocation Loc,
12489                                           OpenMPDirectiveKind Kind,
12490                                           bool EmitChecks,
12491                                           bool ForceSimpleCall) {
12492   llvm_unreachable("Not supported in SIMD-only mode");
12493 }
12494 
12495 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12496     CodeGenFunction &CGF, SourceLocation Loc,
12497     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12498     bool Ordered, const DispatchRTInput &DispatchValues) {
12499   llvm_unreachable("Not supported in SIMD-only mode");
12500 }
12501 
12502 void CGOpenMPSIMDRuntime::emitForStaticInit(
12503     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12504     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12505   llvm_unreachable("Not supported in SIMD-only mode");
12506 }
12507 
12508 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12509     CodeGenFunction &CGF, SourceLocation Loc,
12510     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12511   llvm_unreachable("Not supported in SIMD-only mode");
12512 }
12513 
12514 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12515                                                      SourceLocation Loc,
12516                                                      unsigned IVSize,
12517                                                      bool IVSigned) {
12518   llvm_unreachable("Not supported in SIMD-only mode");
12519 }
12520 
12521 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12522                                               SourceLocation Loc,
12523                                               OpenMPDirectiveKind DKind) {
12524   llvm_unreachable("Not supported in SIMD-only mode");
12525 }
12526 
12527 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12528                                               SourceLocation Loc,
12529                                               unsigned IVSize, bool IVSigned,
12530                                               Address IL, Address LB,
12531                                               Address UB, Address ST) {
12532   llvm_unreachable("Not supported in SIMD-only mode");
12533 }
12534 
12535 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12536                                                llvm::Value *NumThreads,
12537                                                SourceLocation Loc) {
12538   llvm_unreachable("Not supported in SIMD-only mode");
12539 }
12540 
12541 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12542                                              ProcBindKind ProcBind,
12543                                              SourceLocation Loc) {
12544   llvm_unreachable("Not supported in SIMD-only mode");
12545 }
12546 
12547 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12548                                                     const VarDecl *VD,
12549                                                     Address VDAddr,
12550                                                     SourceLocation Loc) {
12551   llvm_unreachable("Not supported in SIMD-only mode");
12552 }
12553 
12554 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12555     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12556     CodeGenFunction *CGF) {
12557   llvm_unreachable("Not supported in SIMD-only mode");
12558 }
12559 
12560 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12561     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12562   llvm_unreachable("Not supported in SIMD-only mode");
12563 }
12564 
12565 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12566                                     ArrayRef<const Expr *> Vars,
12567                                     SourceLocation Loc,
12568                                     llvm::AtomicOrdering AO) {
12569   llvm_unreachable("Not supported in SIMD-only mode");
12570 }
12571 
12572 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12573                                        const OMPExecutableDirective &D,
12574                                        llvm::Function *TaskFunction,
12575                                        QualType SharedsTy, Address Shareds,
12576                                        const Expr *IfCond,
12577                                        const OMPTaskDataTy &Data) {
12578   llvm_unreachable("Not supported in SIMD-only mode");
12579 }
12580 
12581 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12582     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12583     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12584     const Expr *IfCond, const OMPTaskDataTy &Data) {
12585   llvm_unreachable("Not supported in SIMD-only mode");
12586 }
12587 
12588 void CGOpenMPSIMDRuntime::emitReduction(
12589     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12590     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12591     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12592   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12593   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12594                                  ReductionOps, Options);
12595 }
12596 
12597 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12598     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12599     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12600   llvm_unreachable("Not supported in SIMD-only mode");
12601 }
12602 
12603 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12604                                                 SourceLocation Loc,
12605                                                 bool IsWorksharingReduction) {
12606   llvm_unreachable("Not supported in SIMD-only mode");
12607 }
12608 
12609 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12610                                                   SourceLocation Loc,
12611                                                   ReductionCodeGen &RCG,
12612                                                   unsigned N) {
12613   llvm_unreachable("Not supported in SIMD-only mode");
12614 }
12615 
12616 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12617                                                   SourceLocation Loc,
12618                                                   llvm::Value *ReductionsPtr,
12619                                                   LValue SharedLVal) {
12620   llvm_unreachable("Not supported in SIMD-only mode");
12621 }
12622 
12623 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12624                                            SourceLocation Loc) {
12625   llvm_unreachable("Not supported in SIMD-only mode");
12626 }
12627 
12628 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12629     CodeGenFunction &CGF, SourceLocation Loc,
12630     OpenMPDirectiveKind CancelRegion) {
12631   llvm_unreachable("Not supported in SIMD-only mode");
12632 }
12633 
12634 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12635                                          SourceLocation Loc, const Expr *IfCond,
12636                                          OpenMPDirectiveKind CancelRegion) {
12637   llvm_unreachable("Not supported in SIMD-only mode");
12638 }
12639 
12640 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12641     const OMPExecutableDirective &D, StringRef ParentName,
12642     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12643     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12644   llvm_unreachable("Not supported in SIMD-only mode");
12645 }
12646 
12647 void CGOpenMPSIMDRuntime::emitTargetCall(
12648     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12649     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12650     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12651     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12652                                      const OMPLoopDirective &D)>
12653         SizeEmitter) {
12654   llvm_unreachable("Not supported in SIMD-only mode");
12655 }
12656 
12657 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12658   llvm_unreachable("Not supported in SIMD-only mode");
12659 }
12660 
12661 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12662   llvm_unreachable("Not supported in SIMD-only mode");
12663 }
12664 
12665 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12666   return false;
12667 }
12668 
12669 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12670                                         const OMPExecutableDirective &D,
12671                                         SourceLocation Loc,
12672                                         llvm::Function *OutlinedFn,
12673                                         ArrayRef<llvm::Value *> CapturedVars) {
12674   llvm_unreachable("Not supported in SIMD-only mode");
12675 }
12676 
12677 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12678                                              const Expr *NumTeams,
12679                                              const Expr *ThreadLimit,
12680                                              SourceLocation Loc) {
12681   llvm_unreachable("Not supported in SIMD-only mode");
12682 }
12683 
12684 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12685     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12686     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12687   llvm_unreachable("Not supported in SIMD-only mode");
12688 }
12689 
12690 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12691     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12692     const Expr *Device) {
12693   llvm_unreachable("Not supported in SIMD-only mode");
12694 }
12695 
12696 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12697                                            const OMPLoopDirective &D,
12698                                            ArrayRef<Expr *> NumIterations) {
12699   llvm_unreachable("Not supported in SIMD-only mode");
12700 }
12701 
12702 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12703                                               const OMPDependClause *C) {
12704   llvm_unreachable("Not supported in SIMD-only mode");
12705 }
12706 
12707 const VarDecl *
12708 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12709                                         const VarDecl *NativeParam) const {
12710   llvm_unreachable("Not supported in SIMD-only mode");
12711 }
12712 
12713 Address
12714 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12715                                          const VarDecl *NativeParam,
12716                                          const VarDecl *TargetParam) const {
12717   llvm_unreachable("Not supported in SIMD-only mode");
12718 }
12719